Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* NET3 Protocol independent device support routines.

2

* NET3 Protocol independent device support routines.

3

*

3

*

4

* This program is free software; you can redistribute it and/or

4

* This program is free software; you can redistribute it and/or

5

* modify it under the terms of the GNU General Public License

5

* modify it under the terms of the GNU General Public License

6

* as published by the Free Software Foundation; either version

6

* as published by the Free Software Foundation; either version

7

* 2 of the License, or (at your option) any later version.

7

* 2 of the License, or (at your option) any later version.

8

*

8

*

9

* Derived from the non IP parts of dev.c 1.0.19

9

* Derived from the non IP parts of dev.c 1.0.19

10

* Authors: Ross Biro

10

* Authors: Ross Biro

11

* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>

11

* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>

12

* Mark Evans, <evansmp@uhura.aston.ac.uk>

12

* Mark Evans, <evansmp@uhura.aston.ac.uk>

13

*

13

*

14

* Additional Authors:

14

* Additional Authors:

15

* Florian la Roche <rzsfl@rz.uni-sb.de>

15

* Florian la Roche <rzsfl@rz.uni-sb.de>

16

* Alan Cox <gw4pts@gw4pts.ampr.org>

16

* Alan Cox <gw4pts@gw4pts.ampr.org>

17

* David Hinds <dahinds@users.sourceforge.net>

17

* David Hinds <dahinds@users.sourceforge.net>

18

* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>

18

* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>

19

* Adam Sulmicki <adam@cfar.umd.edu>

19

* Adam Sulmicki <adam@cfar.umd.edu>

20

* Pekka Riikonen <priikone@poesidon.pspt.fi>

20

* Pekka Riikonen <priikone@poesidon.pspt.fi>

21

*

21

*

22

* Changes:

22

* Changes:

23

* D.J. Barrow : Fixed bug where dev->refcnt gets set

23

* D.J. Barrow : Fixed bug where dev->refcnt gets set

24

* to 2 if register_netdev gets called

24

* to 2 if register_netdev gets called

25

* before net_dev_init & also removed a

25

* before net_dev_init & also removed a

26

* few lines of code in the process.

26

* few lines of code in the process.

27

* Alan Cox : device private ioctl copies fields back.

27

* Alan Cox : device private ioctl copies fields back.

28

* Alan Cox : Transmit queue code does relevant

28

* Alan Cox : Transmit queue code does relevant

29

* stunts to keep the queue safe.

29

* stunts to keep the queue safe.

30

* Alan Cox : Fixed double lock.

30

* Alan Cox : Fixed double lock.

31

* Alan Cox : Fixed promisc NULL pointer trap

31

* Alan Cox : Fixed promisc NULL pointer trap

32

* ???????? : Support the full private ioctl range

32

* ???????? : Support the full private ioctl range

33

* Alan Cox : Moved ioctl permission check into

33

* Alan Cox : Moved ioctl permission check into

34

* drivers

34

* drivers

35

* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI

35

* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI

36

* Alan Cox : 100 backlog just doesn't cut it when

36

* Alan Cox : 100 backlog just doesn't cut it when

37

* you start doing multicast video 8)

37

* you start doing multicast video 8)

38

* Alan Cox : Rewrote net_bh and list manager.

38

* Alan Cox : Rewrote net_bh and list manager.

39

* Alan Cox : Fix ETH_P_ALL echoback lengths.

39

* Alan Cox : Fix ETH_P_ALL echoback lengths.

40

* Alan Cox : Took out transmit every packet pass

40

* Alan Cox : Took out transmit every packet pass

41

* Saved a few bytes in the ioctl handler

41

* Saved a few bytes in the ioctl handler

42

* Alan Cox : Network driver sets packet type before

42

* Alan Cox : Network driver sets packet type before

43

* calling netif_rx. Saves a function

43

* calling netif_rx. Saves a function

44

* call a packet.

44

* call a packet.

45

* Alan Cox : Hashed net_bh()

45

* Alan Cox : Hashed net_bh()

46

* Richard Kooijman: Timestamp fixes.

46

* Richard Kooijman: Timestamp fixes.

47

* Alan Cox : Wrong field in SIOCGIFDSTADDR

47

* Alan Cox : Wrong field in SIOCGIFDSTADDR

48

* Alan Cox : Device lock protection.

48

* Alan Cox : Device lock protection.

49

* Alan Cox : Fixed nasty side effect of device close

49

* Alan Cox : Fixed nasty side effect of device close

50

* changes.

50

* changes.

51

* Rudi Cilibrasi : Pass the right thing to

51

* Rudi Cilibrasi : Pass the right thing to

52

* set_mac_address()

52

* set_mac_address()

53

* Dave Miller : 32bit quantity for the device lock to

53

* Dave Miller : 32bit quantity for the device lock to

54

* make it work out on a Sparc.

54

* make it work out on a Sparc.

55

* Bjorn Ekwall : Added KERNELD hack.

55

* Bjorn Ekwall : Added KERNELD hack.

56

* Alan Cox : Cleaned up the backlog initialise.

56

* Alan Cox : Cleaned up the backlog initialise.

57

* Craig Metz : SIOCGIFCONF fix if space for under

57

* Craig Metz : SIOCGIFCONF fix if space for under

58

* 1 device.

58

* 1 device.

59

* Thomas Bogendoerfer : Return ENODEV for dev_open, if there

59

* Thomas Bogendoerfer : Return ENODEV for dev_open, if there

60

* is no device open function.

60

* is no device open function.

61

* Andi Kleen : Fix error reporting for SIOCGIFCONF

61

* Andi Kleen : Fix error reporting for SIOCGIFCONF

62

* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF

62

* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF

63

* Cyrus Durgin : Cleaned for KMOD

63

* Cyrus Durgin : Cleaned for KMOD

64

* Adam Sulmicki : Bug Fix : Network Device Unload

64

* Adam Sulmicki : Bug Fix : Network Device Unload

65

* A network device unload needs to purge

65

* A network device unload needs to purge

66

* the backlog queue.

66

* the backlog queue.

67

* Paul Rusty Russell : SIOCSIFNAME

67

* Paul Rusty Russell : SIOCSIFNAME

68

* Pekka Riikonen : Netdev boot-time settings code

68

* Pekka Riikonen : Netdev boot-time settings code

69

* Andrew Morton : Make unregister_netdevice wait

69

* Andrew Morton : Make unregister_netdevice wait

70

* indefinitely on dev->refcnt

70

* indefinitely on dev->refcnt

71

* J Hadi Salim : - Backlog queue sampling

71

* J Hadi Salim : - Backlog queue sampling

72

* - netif_rx() feedback

72

* - netif_rx() feedback

73

*/

73

*/

74

75

#include <asm/uaccess.h>

75

#include <asm/uaccess.h>

76

#include <linux/bitops.h>

76

#include <linux/bitops.h>

77

#include <linux/capability.h>

77

#include <linux/capability.h>

78

#include <linux/cpu.h>

78

#include <linux/cpu.h>

79

#include <linux/types.h>

79

#include <linux/types.h>

80

#include <linux/kernel.h>

80

#include <linux/kernel.h>

81

#include <linux/hash.h>

81

#include <linux/hash.h>

82

#include <linux/slab.h>

82

#include <linux/slab.h>

83

#include <linux/sched.h>

83

#include <linux/sched.h>

84

#include <linux/mutex.h>

84

#include <linux/mutex.h>

85

#include <linux/string.h>

85

#include <linux/string.h>

86

#include <linux/mm.h>

86

#include <linux/mm.h>

87

#include <linux/socket.h>

87

#include <linux/socket.h>

88

#include <linux/sockios.h>

88

#include <linux/sockios.h>

89

#include <linux/errno.h>

89

#include <linux/errno.h>

90

#include <linux/interrupt.h>

90

#include <linux/interrupt.h>

91

#include <linux/if_ether.h>

91

#include <linux/if_ether.h>

92

#include <linux/netdevice.h>

92

#include <linux/netdevice.h>

93

#include <linux/etherdevice.h>

93

#include <linux/etherdevice.h>

94

#include <linux/ethtool.h>

94

#include <linux/ethtool.h>

95

#include <linux/notifier.h>

95

#include <linux/notifier.h>

96

#include <linux/skbuff.h>

96

#include <linux/skbuff.h>

97

#include <net/net_namespace.h>

97

#include <net/net_namespace.h>

98

#include <net/sock.h>

98

#include <net/sock.h>

99

#include <linux/rtnetlink.h>

99

#include <linux/rtnetlink.h>

100

#include <linux/stat.h>

100

#include <linux/stat.h>

101

#include <net/dst.h>

101

#include <net/dst.h>

102

#include <net/pkt_sched.h>

102

#include <net/pkt_sched.h>

103

#include <net/checksum.h>

103

#include <net/checksum.h>

104

#include <net/xfrm.h>

104

#include <net/xfrm.h>

105

#include <linux/highmem.h>

105

#include <linux/highmem.h>

106

#include <linux/init.h>

106

#include <linux/init.h>

107

#include <linux/module.h>

107

#include <linux/module.h>

108

#include <linux/netpoll.h>

108

#include <linux/netpoll.h>

109

#include <linux/rcupdate.h>

109

#include <linux/rcupdate.h>

110

#include <linux/delay.h>

110

#include <linux/delay.h>

111

#include <net/iw_handler.h>

111

#include <net/iw_handler.h>

112

#include <asm/current.h>

112

#include <asm/current.h>

113

#include <linux/audit.h>

113

#include <linux/audit.h>

114

#include <linux/dmaengine.h>

114

#include <linux/dmaengine.h>

115

#include <linux/err.h>

115

#include <linux/err.h>

116

#include <linux/ctype.h>

116

#include <linux/ctype.h>

117

#include <linux/if_arp.h>

117

#include <linux/if_arp.h>

118

#include <linux/if_vlan.h>

118

#include <linux/if_vlan.h>

119

#include <linux/ip.h>

119

#include <linux/ip.h>

120

#include <net/ip.h>

120

#include <net/ip.h>

121

#include <linux/ipv6.h>

121

#include <linux/ipv6.h>

122

#include <linux/in.h>

122

#include <linux/in.h>

123

#include <linux/jhash.h>

123

#include <linux/jhash.h>

124

#include <linux/random.h>

124

#include <linux/random.h>

125

#include <trace/events/napi.h>

125

#include <trace/events/napi.h>

126

#include <trace/events/net.h>

126

#include <trace/events/net.h>

127

#include <trace/events/skb.h>

127

#include <trace/events/skb.h>

128

#include <linux/pci.h>

128

#include <linux/pci.h>

129

#include <linux/inetdevice.h>

129

#include <linux/inetdevice.h>

130

#include <linux/cpu_rmap.h>

130

#include <linux/cpu_rmap.h>

131

#include <linux/static_key.h>

131

#include <linux/static_key.h>

132

#include <linux/hashtable.h>

132

#include <linux/hashtable.h>

133

#include <linux/vmalloc.h>

133

#include <linux/vmalloc.h>

134

#include <linux/if_macvlan.h>

134

#include <linux/if_macvlan.h>

135

136

#include "net-sysfs.h"

136

#include "net-sysfs.h"

137

138

/* Instead of increasing this, you should create a hash table. */

138

/* Instead of increasing this, you should create a hash table. */

139

#define MAX_GRO_SKBS 8

139

#define MAX_GRO_SKBS 8

140

141

/* This should be increased if a protocol with a bigger head is added. */

141

/* This should be increased if a protocol with a bigger head is added. */

142

#define GRO_MAX_HEAD (MAX_HEADER + 128)

142

#define GRO_MAX_HEAD (MAX_HEADER + 128)

143

144

static DEFINE_SPINLOCK(ptype_lock);

144

static DEFINE_SPINLOCK(ptype_lock);

145

static DEFINE_SPINLOCK(offload_lock);

145

static DEFINE_SPINLOCK(offload_lock);

146

struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

146

struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

147

struct list_head ptype_all __read_mostly; /* Taps */

147

struct list_head ptype_all __read_mostly; /* Taps */

148

static struct list_head offload_base __read_mostly;

148

static struct list_head offload_base __read_mostly;

149

150

/*

150

/*

151

* The @dev_base_head list is protected by @dev_base_lock and the rtnl

151

* The @dev_base_head list is protected by @dev_base_lock and the rtnl

152

* semaphore.

152

* semaphore.

153

*

153

*

154

* Pure readers hold dev_base_lock for reading, or rcu_read_lock()

154

* Pure readers hold dev_base_lock for reading, or rcu_read_lock()

155

*

155

*

156

* Writers must hold the rtnl semaphore while they loop through the

156

* Writers must hold the rtnl semaphore while they loop through the

157

* dev_base_head list, and hold dev_base_lock for writing when they do the

157

* dev_base_head list, and hold dev_base_lock for writing when they do the

158

* actual updates. This allows pure readers to access the list even

158

* actual updates. This allows pure readers to access the list even

159

* while a writer is preparing to update it.

159

* while a writer is preparing to update it.

160

*

160

*

161

* To put it another way, dev_base_lock is held for writing only to

161

* To put it another way, dev_base_lock is held for writing only to

162

* protect against pure readers; the rtnl semaphore provides the

162

* protect against pure readers; the rtnl semaphore provides the

163

* protection against other writers.

163

* protection against other writers.

164

*

164

*

165

* See, for example usages, register_netdevice() and

165

* See, for example usages, register_netdevice() and

166

* unregister_netdevice(), which must be called with the rtnl

166

* unregister_netdevice(), which must be called with the rtnl

167

* semaphore held.

167

* semaphore held.

168

*/

168

*/

169

DEFINE_RWLOCK(dev_base_lock);

169

DEFINE_RWLOCK(dev_base_lock);

170

EXPORT_SYMBOL(dev_base_lock);

170

EXPORT_SYMBOL(dev_base_lock);

171

172

/* protects napi_hash addition/deletion and napi_gen_id */

172

/* protects napi_hash addition/deletion and napi_gen_id */

173

static DEFINE_SPINLOCK(napi_hash_lock);

173

static DEFINE_SPINLOCK(napi_hash_lock);

174

175

static unsigned int napi_gen_id;

175

static unsigned int napi_gen_id;

176

static DEFINE_HASHTABLE(napi_hash, 8);

176

static DEFINE_HASHTABLE(napi_hash, 8);

177

178

static seqcount_t devnet_rename_seq;

178

static seqcount_t devnet_rename_seq;

179

180

static inline void dev_base_seq_inc(struct net *net)

180

static inline void dev_base_seq_inc(struct net *net)

181

{

181

{

182

while (++net->dev_base_seq == 0);

182

while (++net->dev_base_seq == 0);

183

}

183

}

184

185

static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)

185

static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)

186

{

186

{

187

unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));

187

unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));

188

189

return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];

189

return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];

190

}

190

}

191

192

static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)

192

static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)

193

{

193

{

194

return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];

194

return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];

195

}

195

}

196

197

static inline void rps_lock(struct softnet_data *sd)

197

static inline void rps_lock(struct softnet_data *sd)

198

{

198

{

199

#ifdef CONFIG_RPS

199

#ifdef CONFIG_RPS

200

spin_lock(&sd->input_pkt_queue.lock);

200

spin_lock(&sd->input_pkt_queue.lock);

201

#endif

201

#endif

202

}

202

}

203

204

static inline void rps_unlock(struct softnet_data *sd)

204

static inline void rps_unlock(struct softnet_data *sd)

205

{

205

{

206

#ifdef CONFIG_RPS

206

#ifdef CONFIG_RPS

207

spin_unlock(&sd->input_pkt_queue.lock);

207

spin_unlock(&sd->input_pkt_queue.lock);

208

#endif

208

#endif

209

}

209

}

210

211

/* Device list insertion */

211

/* Device list insertion */

212

static void list_netdevice(struct net_device *dev)

212

static void list_netdevice(struct net_device *dev)

213

{

213

{

214

struct net *net = dev_net(dev);

214

struct net *net = dev_net(dev);

215

216

ASSERT_RTNL();

216

ASSERT_RTNL();

217

218

write_lock_bh(&dev_base_lock);

218

write_lock_bh(&dev_base_lock);

219

list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);

219

list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);

220

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

220

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

221

hlist_add_head_rcu(&dev->index_hlist,

221

hlist_add_head_rcu(&dev->index_hlist,

222

dev_index_hash(net, dev->ifindex));

222

dev_index_hash(net, dev->ifindex));

223

write_unlock_bh(&dev_base_lock);

223

write_unlock_bh(&dev_base_lock);

224

225

dev_base_seq_inc(net);

225

dev_base_seq_inc(net);

226

}

226

}

227

228

/* Device list removal

228

/* Device list removal

229

* caller must respect a RCU grace period before freeing/reusing dev

229

* caller must respect a RCU grace period before freeing/reusing dev

230

*/

230

*/

231

static void unlist_netdevice(struct net_device *dev)

231

static void unlist_netdevice(struct net_device *dev)

232

{

232

{

233

ASSERT_RTNL();

233

ASSERT_RTNL();

234

235

/* Unlink dev from the device chain */

235

/* Unlink dev from the device chain */

236

write_lock_bh(&dev_base_lock);

236

write_lock_bh(&dev_base_lock);

237

list_del_rcu(&dev->dev_list);

237

list_del_rcu(&dev->dev_list);

238

hlist_del_rcu(&dev->name_hlist);

238

hlist_del_rcu(&dev->name_hlist);

239

hlist_del_rcu(&dev->index_hlist);

239

hlist_del_rcu(&dev->index_hlist);

240

write_unlock_bh(&dev_base_lock);

240

write_unlock_bh(&dev_base_lock);

241

242

dev_base_seq_inc(dev_net(dev));

242

dev_base_seq_inc(dev_net(dev));

243

}

243

}

244

245

/*

245

/*

246

* Our notifier list

246

* Our notifier list

247

*/

247

*/

248

249

static RAW_NOTIFIER_HEAD(netdev_chain);

249

static RAW_NOTIFIER_HEAD(netdev_chain);

250

251

/*

251

/*

252

* Device drivers call our routines to queue packets here. We empty the

252

* Device drivers call our routines to queue packets here. We empty the

253

* queue in the local softnet handler.

253

* queue in the local softnet handler.

254

*/

254

*/

255

256

DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

256

DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

257

EXPORT_PER_CPU_SYMBOL(softnet_data);

257

EXPORT_PER_CPU_SYMBOL(softnet_data);

258

259

#ifdef CONFIG_LOCKDEP

259

#ifdef CONFIG_LOCKDEP

260

/*

260

/*

261

* register_netdevice() inits txq->_xmit_lock and sets lockdep class

261

* register_netdevice() inits txq->_xmit_lock and sets lockdep class

262

* according to dev->type

262

* according to dev->type

263

*/

263

*/

264

static const unsigned short netdev_lock_type[] =

264

static const unsigned short netdev_lock_type[] =

265

{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,

265

{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,

266

ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,

266

ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,

267

ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,

267

ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,

268

ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,

268

ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,

269

ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,

269

ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,

270

ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,

270

ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,

271

ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,

271

ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,

272

ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,

272

ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,

273

ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,

273

ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,

274

ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,

274

ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,

275

ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,

275

ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,

276

ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,

276

ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,

277

ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,

277

ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,

278

ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,

278

ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,

279

ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

279

ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

280

281

static const char *const netdev_lock_name[] =

281

static const char *const netdev_lock_name[] =

282

{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",

282

{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",

283

"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",

283

"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",

284

"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",

284

"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",

285

"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",

285

"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",

286

"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",

286

"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",

287

"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",

287

"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",

288

"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",

288

"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",

289

"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",

289

"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",

290

"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",

290

"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",

291

"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",

291

"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",

292

"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",

292

"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",

293

"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",

293

"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",

294

"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",

294

"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",

295

"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",

295

"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",

296

"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};

296

"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};

297

298

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];

298

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];

299

static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];

299

static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];

300

301

static inline unsigned short netdev_lock_pos(unsigned short dev_type)

301

static inline unsigned short netdev_lock_pos(unsigned short dev_type)

302

{

302

{

303

int i;

303

int i;

304

305

for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)

305

for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)

306

if (netdev_lock_type[i] == dev_type)

306

if (netdev_lock_type[i] == dev_type)

307

return i;

307

return i;

308

/* the last key is used by default */

308

/* the last key is used by default */

309

return ARRAY_SIZE(netdev_lock_type) - 1;

309

return ARRAY_SIZE(netdev_lock_type) - 1;

310

}

310

}

311

312

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

312

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

313

unsigned short dev_type)

313

unsigned short dev_type)

314

{

314

{

315

int i;

315

int i;

316

317

i = netdev_lock_pos(dev_type);

317

i = netdev_lock_pos(dev_type);

318

lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],

318

lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],

319

netdev_lock_name[i]);

319

netdev_lock_name[i]);

320

}

320

}

321

322

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

322

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

323

{

323

{

324

int i;

324

int i;

325

326

i = netdev_lock_pos(dev->type);

326

i = netdev_lock_pos(dev->type);

327

lockdep_set_class_and_name(&dev->addr_list_lock,

327

lockdep_set_class_and_name(&dev->addr_list_lock,

328

&netdev_addr_lock_key[i],

328

&netdev_addr_lock_key[i],

329

netdev_lock_name[i]);

329

netdev_lock_name[i]);

330

}

330

}

331

#else

331

#else

332

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

332

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

333

unsigned short dev_type)

333

unsigned short dev_type)

334

{

334

{

335

}

335

}

336

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

336

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

337

{

337

{

338

}

338

}

339

#endif

339

#endif

340

341

/*******************************************************************************

341

/*******************************************************************************

342

343

Protocol management and registration routines

343

Protocol management and registration routines

344

345

*******************************************************************************/

345

*******************************************************************************/

346

347

/*

347

/*

348

* Add a protocol ID to the list. Now that the input handler is

348

* Add a protocol ID to the list. Now that the input handler is

349

* smarter we can dispense with all the messy stuff that used to be

349

* smarter we can dispense with all the messy stuff that used to be

350

* here.

350

* here.

351

*

351

*

352

* BEWARE!!! Protocol handlers, mangling input packets,

352

* BEWARE!!! Protocol handlers, mangling input packets,

353

* MUST BE last in hash buckets and checking protocol handlers

353

* MUST BE last in hash buckets and checking protocol handlers

354

* MUST start from promiscuous ptype_all chain in net_bh.

354

* MUST start from promiscuous ptype_all chain in net_bh.

355

* It is true now, do not change it.

355

* It is true now, do not change it.

356

* Explanation follows: if protocol handler, mangling packet, will

356

* Explanation follows: if protocol handler, mangling packet, will

357

* be the first on list, it is not able to sense, that packet

357

* be the first on list, it is not able to sense, that packet

358

* is cloned and should be copied-on-write, so that it will

358

* is cloned and should be copied-on-write, so that it will

359

* change it and subsequent readers will get broken packet.

359

* change it and subsequent readers will get broken packet.

360

* --ANK (980803)

360

* --ANK (980803)

361

*/

361

*/

362

363

static inline struct list_head *ptype_head(const struct packet_type *pt)

363

static inline struct list_head *ptype_head(const struct packet_type *pt)

364

{

364

{

365

if (pt->type == htons(ETH_P_ALL))

365

if (pt->type == htons(ETH_P_ALL))

366

return &ptype_all;

366

return &ptype_all;

367

else

367

else

368

return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];

368

return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];

369

}

369

}

370

371

/**

371

/**

372

* dev_add_pack - add packet handler

372

* dev_add_pack - add packet handler

373

* @pt: packet type declaration

373

* @pt: packet type declaration

374

*

374

*

375

* Add a protocol handler to the networking stack. The passed &packet_type

375

* Add a protocol handler to the networking stack. The passed &packet_type

376

* is linked into kernel lists and may not be freed until it has been

376

* is linked into kernel lists and may not be freed until it has been

377

* removed from the kernel lists.

377

* removed from the kernel lists.

378

*

378

*

379

* This call does not sleep therefore it can not

379

* This call does not sleep therefore it can not

380

* guarantee all CPU's that are in middle of receiving packets

380

* guarantee all CPU's that are in middle of receiving packets

381

* will see the new packet type (until the next received packet).

381

* will see the new packet type (until the next received packet).

382

*/

382

*/

383

384

void dev_add_pack(struct packet_type *pt)

384

void dev_add_pack(struct packet_type *pt)

385

{

385

{

386

struct list_head *head = ptype_head(pt);

386

struct list_head *head = ptype_head(pt);

387

388

spin_lock(&ptype_lock);

388

spin_lock(&ptype_lock);

389

list_add_rcu(&pt->list, head);

389

list_add_rcu(&pt->list, head);

390

spin_unlock(&ptype_lock);

390

spin_unlock(&ptype_lock);

391

}

391

}

392

EXPORT_SYMBOL(dev_add_pack);

392

EXPORT_SYMBOL(dev_add_pack);

393

394

/**

394

/**

395

* __dev_remove_pack - remove packet handler

395

* __dev_remove_pack - remove packet handler

396

* @pt: packet type declaration

396

* @pt: packet type declaration

397

*

397

*

398

* Remove a protocol handler that was previously added to the kernel

398

* Remove a protocol handler that was previously added to the kernel

399

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

399

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

400

* from the kernel lists and can be freed or reused once this function

400

* from the kernel lists and can be freed or reused once this function

401

* returns.

401

* returns.

402

*

402

*

403

* The packet type might still be in use by receivers

403

* The packet type might still be in use by receivers

404

* and must not be freed until after all the CPU's have gone

404

* and must not be freed until after all the CPU's have gone

405

* through a quiescent state.

405

* through a quiescent state.

406

*/

406

*/

407

void __dev_remove_pack(struct packet_type *pt)

407

void __dev_remove_pack(struct packet_type *pt)

408

{

408

{

409

struct list_head *head = ptype_head(pt);

409

struct list_head *head = ptype_head(pt);

410

struct packet_type *pt1;

410

struct packet_type *pt1;

411

412

spin_lock(&ptype_lock);

412

spin_lock(&ptype_lock);

413

414

list_for_each_entry(pt1, head, list) {

414

list_for_each_entry(pt1, head, list) {

415

if (pt == pt1) {

415

if (pt == pt1) {

416

list_del_rcu(&pt->list);

416

list_del_rcu(&pt->list);

417

goto out;

417

goto out;

418

}

418

}

419

}

419

}

420

421

pr_warn("dev_remove_pack: %p not found\n", pt);

421

pr_warn("dev_remove_pack: %p not found\n", pt);

422

out:

422

out:

423

spin_unlock(&ptype_lock);

423

spin_unlock(&ptype_lock);

424

}

424

}

425

EXPORT_SYMBOL(__dev_remove_pack);

425

EXPORT_SYMBOL(__dev_remove_pack);

426

427

/**

427

/**

428

* dev_remove_pack - remove packet handler

428

* dev_remove_pack - remove packet handler

429

* @pt: packet type declaration

429

* @pt: packet type declaration

430

*

430

*

431

* Remove a protocol handler that was previously added to the kernel

431

* Remove a protocol handler that was previously added to the kernel

432

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

432

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

433

* from the kernel lists and can be freed or reused once this function

433

* from the kernel lists and can be freed or reused once this function

434

* returns.

434

* returns.

435

*

435

*

436

* This call sleeps to guarantee that no CPU is looking at the packet

436

* This call sleeps to guarantee that no CPU is looking at the packet

437

* type after return.

437

* type after return.

438

*/

438

*/

439

void dev_remove_pack(struct packet_type *pt)

439

void dev_remove_pack(struct packet_type *pt)

440

{

440

{

441

__dev_remove_pack(pt);

441

__dev_remove_pack(pt);

442

443

synchronize_net();

443

synchronize_net();

444

}

444

}

445

EXPORT_SYMBOL(dev_remove_pack);

445

EXPORT_SYMBOL(dev_remove_pack);

446

447

448

/**

448

/**

449

* dev_add_offload - register offload handlers

449

* dev_add_offload - register offload handlers

450

* @po: protocol offload declaration

450

* @po: protocol offload declaration

451

*

451

*

452

* Add protocol offload handlers to the networking stack. The passed

452

* Add protocol offload handlers to the networking stack. The passed

453

* &proto_offload is linked into kernel lists and may not be freed until

453

* &proto_offload is linked into kernel lists and may not be freed until

454

* it has been removed from the kernel lists.

454

* it has been removed from the kernel lists.

455

*

455

*

456

* This call does not sleep therefore it can not

456

* This call does not sleep therefore it can not

457

* guarantee all CPU's that are in middle of receiving packets

457

* guarantee all CPU's that are in middle of receiving packets

458

* will see the new offload handlers (until the next received packet).

458

* will see the new offload handlers (until the next received packet).

459

*/

459

*/

460

void dev_add_offload(struct packet_offload *po)

460

void dev_add_offload(struct packet_offload *po)

461

{

461

{

462

struct list_head *head = &offload_base;

462

struct list_head *head = &offload_base;

463

464

spin_lock(&offload_lock);

464

spin_lock(&offload_lock);

465

list_add_rcu(&po->list, head);

465

list_add_rcu(&po->list, head);

466

spin_unlock(&offload_lock);

466

spin_unlock(&offload_lock);

467

}

467

}

468

EXPORT_SYMBOL(dev_add_offload);

468

EXPORT_SYMBOL(dev_add_offload);

469

470

/**

470

/**

471

* __dev_remove_offload - remove offload handler

471

* __dev_remove_offload - remove offload handler

472

* @po: packet offload declaration

472

* @po: packet offload declaration

473

*

473

*

474

* Remove a protocol offload handler that was previously added to the

474

* Remove a protocol offload handler that was previously added to the

475

* kernel offload handlers by dev_add_offload(). The passed &offload_type

475

* kernel offload handlers by dev_add_offload(). The passed &offload_type

476

* is removed from the kernel lists and can be freed or reused once this

476

* is removed from the kernel lists and can be freed or reused once this

477

* function returns.

477

* function returns.

478

*

478

*

479

* The packet type might still be in use by receivers

479

* The packet type might still be in use by receivers

480

* and must not be freed until after all the CPU's have gone

480

* and must not be freed until after all the CPU's have gone

481

* through a quiescent state.

481

* through a quiescent state.

482

*/

482

*/

483

void __dev_remove_offload(struct packet_offload *po)

483

void __dev_remove_offload(struct packet_offload *po)

484

{

484

{

485

struct list_head *head = &offload_base;

485

struct list_head *head = &offload_base;

486

struct packet_offload *po1;

486

struct packet_offload *po1;

487

488

spin_lock(&offload_lock);

488

spin_lock(&offload_lock);

489

490

list_for_each_entry(po1, head, list) {

490

list_for_each_entry(po1, head, list) {

491

if (po == po1) {

491

if (po == po1) {

492

list_del_rcu(&po->list);

492

list_del_rcu(&po->list);

493

goto out;

493

goto out;

494

}

494

}

495

}

495

}

496

497

pr_warn("dev_remove_offload: %p not found\n", po);

497

pr_warn("dev_remove_offload: %p not found\n", po);

498

out:

498

out:

499

spin_unlock(&offload_lock);

499

spin_unlock(&offload_lock);

500

}

500

}

501

EXPORT_SYMBOL(__dev_remove_offload);

501

EXPORT_SYMBOL(__dev_remove_offload);

502

503

/**

503

/**

504

* dev_remove_offload - remove packet offload handler

504

* dev_remove_offload - remove packet offload handler

505

* @po: packet offload declaration

505

* @po: packet offload declaration

506

*

506

*

507

* Remove a packet offload handler that was previously added to the kernel

507

* Remove a packet offload handler that was previously added to the kernel

508

* offload handlers by dev_add_offload(). The passed &offload_type is

508

* offload handlers by dev_add_offload(). The passed &offload_type is

509

* removed from the kernel lists and can be freed or reused once this

509

* removed from the kernel lists and can be freed or reused once this

510

* function returns.

510

* function returns.

511

*

511

*

512

* This call sleeps to guarantee that no CPU is looking at the packet

512

* This call sleeps to guarantee that no CPU is looking at the packet

513

* type after return.

513

* type after return.

514

*/

514

*/

515

void dev_remove_offload(struct packet_offload *po)

515

void dev_remove_offload(struct packet_offload *po)

516

{

516

{

517

__dev_remove_offload(po);

517

__dev_remove_offload(po);

518

519

synchronize_net();

519

synchronize_net();

520

}

520

}

521

EXPORT_SYMBOL(dev_remove_offload);

521

EXPORT_SYMBOL(dev_remove_offload);

522

523

/******************************************************************************

523

/******************************************************************************

524

525

Device Boot-time Settings Routines

525

Device Boot-time Settings Routines

526

527

*******************************************************************************/

527

*******************************************************************************/

528

529

/* Boot time configuration table */

529

/* Boot time configuration table */

530

static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];

530

static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];

531

532

/**

532

/**

533

* netdev_boot_setup_add - add new setup entry

533

* netdev_boot_setup_add - add new setup entry

534

* @name: name of the device

534

* @name: name of the device

535

* @map: configured settings for the device

535

* @map: configured settings for the device

536

*

536

*

537

* Adds new setup entry to the dev_boot_setup list. The function

537

* Adds new setup entry to the dev_boot_setup list. The function

538

* returns 0 on error and 1 on success. This is a generic routine to

538

* returns 0 on error and 1 on success. This is a generic routine to

539

* all netdevices.

539

* all netdevices.

540

*/

540

*/

541

static int netdev_boot_setup_add(char *name, struct ifmap *map)

541

static int netdev_boot_setup_add(char *name, struct ifmap *map)

542

{

542

{

543

struct netdev_boot_setup *s;

543

struct netdev_boot_setup *s;

544

int i;

544

int i;

545

546

s = dev_boot_setup;

546

s = dev_boot_setup;

547

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

547

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

548

if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {

548

if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {

549

memset(s[i].name, 0, sizeof(s[i].name));

549

memset(s[i].name, 0, sizeof(s[i].name));

550

strlcpy(s[i].name, name, IFNAMSIZ);

550

strlcpy(s[i].name, name, IFNAMSIZ);

551

memcpy(&s[i].map, map, sizeof(s[i].map));

551

memcpy(&s[i].map, map, sizeof(s[i].map));

552

break;

552

break;

553

}

553

}

554

}

554

}

555

556

return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;

556

return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;

557

}

557

}

558

559

/**

559

/**

560

* netdev_boot_setup_check - check boot time settings

560

* netdev_boot_setup_check - check boot time settings

561

* @dev: the netdevice

561

* @dev: the netdevice

562

*

562

*

563

* Check boot time settings for the device.

563

* Check boot time settings for the device.

564

* The found settings are set for the device to be used

564

* The found settings are set for the device to be used

565

* later in the device probing.

565

* later in the device probing.

566

* Returns 0 if no settings found, 1 if they are.

566

* Returns 0 if no settings found, 1 if they are.

567

*/

567

*/

568

int netdev_boot_setup_check(struct net_device *dev)

568

int netdev_boot_setup_check(struct net_device *dev)

569

{

569

{

570

struct netdev_boot_setup *s = dev_boot_setup;

570

struct netdev_boot_setup *s = dev_boot_setup;

571

int i;

571

int i;

572

573

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

573

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

574

if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&

574

if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&

575

!strcmp(dev->name, s[i].name)) {

575

!strcmp(dev->name, s[i].name)) {

576

dev->irq = s[i].map.irq;

576

dev->irq = s[i].map.irq;

577

dev->base_addr = s[i].map.base_addr;

577

dev->base_addr = s[i].map.base_addr;

578

dev->mem_start = s[i].map.mem_start;

578

dev->mem_start = s[i].map.mem_start;

579

dev->mem_end = s[i].map.mem_end;

579

dev->mem_end = s[i].map.mem_end;

580

return 1;

580

return 1;

581

}

581

}

582

}

582

}

583

return 0;

583

return 0;

584

}

584

}

585

EXPORT_SYMBOL(netdev_boot_setup_check);

585

EXPORT_SYMBOL(netdev_boot_setup_check);

586

587

588

/**

588

/**

589

* netdev_boot_base - get address from boot time settings

589

* netdev_boot_base - get address from boot time settings

590

* @prefix: prefix for network device

590

* @prefix: prefix for network device

591

* @unit: id for network device

591

* @unit: id for network device

592

*

592

*

593

* Check boot time settings for the base address of device.

593

* Check boot time settings for the base address of device.

594

* The found settings are set for the device to be used

594

* The found settings are set for the device to be used

595

* later in the device probing.

595

* later in the device probing.

596

* Returns 0 if no settings found.

596

* Returns 0 if no settings found.

597

*/

597

*/

598

unsigned long netdev_boot_base(const char *prefix, int unit)

598

unsigned long netdev_boot_base(const char *prefix, int unit)

599

{

599

{

600

const struct netdev_boot_setup *s = dev_boot_setup;

600

const struct netdev_boot_setup *s = dev_boot_setup;

601

char name[IFNAMSIZ];

601

char name[IFNAMSIZ];

602

int i;

602

int i;

603

604

sprintf(name, "%s%d", prefix, unit);

604

sprintf(name, "%s%d", prefix, unit);

605

606

/*

606

/*

607

* If device already registered then return base of 1

607

* If device already registered then return base of 1

608

* to indicate not to probe for this interface

608

* to indicate not to probe for this interface

609

*/

609

*/

610

if (__dev_get_by_name(&init_net, name))

610

if (__dev_get_by_name(&init_net, name))

611

return 1;

611

return 1;

612

613

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)

613

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)

614

if (!strcmp(name, s[i].name))

614

if (!strcmp(name, s[i].name))

615

return s[i].map.base_addr;

615

return s[i].map.base_addr;

616

return 0;

616

return 0;

617

}

617

}

618

619

/*

619

/*

620

* Saves at boot time configured settings for any netdevice.

620

* Saves at boot time configured settings for any netdevice.

621

*/

621

*/

622

int __init netdev_boot_setup(char *str)

622

int __init netdev_boot_setup(char *str)

623

{

623

{

624

int ints[5];

624

int ints[5];

625

struct ifmap map;

625

struct ifmap map;

626

627

str = get_options(str, ARRAY_SIZE(ints), ints);

627

str = get_options(str, ARRAY_SIZE(ints), ints);

628

if (!str || !*str)

628

if (!str || !*str)

629

return 0;

629

return 0;

630

631

/* Save settings */

631

/* Save settings */

632

memset(&map, 0, sizeof(map));

632

memset(&map, 0, sizeof(map));

633

if (ints[0] > 0)

633

if (ints[0] > 0)

634

map.irq = ints[1];

634

map.irq = ints[1];

635

if (ints[0] > 1)

635

if (ints[0] > 1)

636

map.base_addr = ints[2];

636

map.base_addr = ints[2];

637

if (ints[0] > 2)

637

if (ints[0] > 2)

638

map.mem_start = ints[3];

638

map.mem_start = ints[3];

639

if (ints[0] > 3)

639

if (ints[0] > 3)

640

map.mem_end = ints[4];

640

map.mem_end = ints[4];

641

642

/* Add new entry to the list */

642

/* Add new entry to the list */

643

return netdev_boot_setup_add(str, &map);

643

return netdev_boot_setup_add(str, &map);

644

}

644

}

645

646

__setup("netdev=", netdev_boot_setup);

646

__setup("netdev=", netdev_boot_setup);

647

648

/*******************************************************************************

648

/*******************************************************************************

649

650

Device Interface Subroutines

650

Device Interface Subroutines

651

652

*******************************************************************************/

652

*******************************************************************************/

653

654

/**

654

/**

655

* __dev_get_by_name - find a device by its name

655

* __dev_get_by_name - find a device by its name

656

* @net: the applicable net namespace

656

* @net: the applicable net namespace

657

* @name: name to find

657

* @name: name to find

658

*

658

*

659

* Find an interface by name. Must be called under RTNL semaphore

659

* Find an interface by name. Must be called under RTNL semaphore

660

* or @dev_base_lock. If the name is found a pointer to the device

660

* or @dev_base_lock. If the name is found a pointer to the device

661

* is returned. If the name is not found then %NULL is returned. The

661

* is returned. If the name is not found then %NULL is returned. The

662

* reference counters are not incremented so the caller must be

662

* reference counters are not incremented so the caller must be

663

* careful with locks.

663

* careful with locks.

664

*/

664

*/

665

666

struct net_device *__dev_get_by_name(struct net *net, const char *name)

666

struct net_device *__dev_get_by_name(struct net *net, const char *name)

667

{

667

{

668

struct net_device *dev;

668

struct net_device *dev;

669

struct hlist_head *head = dev_name_hash(net, name);

669

struct hlist_head *head = dev_name_hash(net, name);

670

671

hlist_for_each_entry(dev, head, name_hlist)

671

hlist_for_each_entry(dev, head, name_hlist)

672

if (!strncmp(dev->name, name, IFNAMSIZ))

672

if (!strncmp(dev->name, name, IFNAMSIZ))

673

return dev;

673

return dev;

674

675

return NULL;

675

return NULL;

676

}

676

}

677

EXPORT_SYMBOL(__dev_get_by_name);

677

EXPORT_SYMBOL(__dev_get_by_name);

678

679

/**

679

/**

680

* dev_get_by_name_rcu - find a device by its name

680

* dev_get_by_name_rcu - find a device by its name

681

* @net: the applicable net namespace

681

* @net: the applicable net namespace

682

* @name: name to find

682

* @name: name to find

683

*

683

*

684

* Find an interface by name.

684

* Find an interface by name.

685

* If the name is found a pointer to the device is returned.

685

* If the name is found a pointer to the device is returned.

686

* If the name is not found then %NULL is returned.

686

* If the name is not found then %NULL is returned.

687

* The reference counters are not incremented so the caller must be

687

* The reference counters are not incremented so the caller must be

688

* careful with locks. The caller must hold RCU lock.

688

* careful with locks. The caller must hold RCU lock.

689

*/

689

*/

690

691

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)

691

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)

692

{

692

{

693

struct net_device *dev;

693

struct net_device *dev;

694

struct hlist_head *head = dev_name_hash(net, name);

694

struct hlist_head *head = dev_name_hash(net, name);

695

696

hlist_for_each_entry_rcu(dev, head, name_hlist)

696

hlist_for_each_entry_rcu(dev, head, name_hlist)

697

if (!strncmp(dev->name, name, IFNAMSIZ))

697

if (!strncmp(dev->name, name, IFNAMSIZ))

698

return dev;

698

return dev;

699

700

return NULL;

700

return NULL;

701

}

701

}

702

EXPORT_SYMBOL(dev_get_by_name_rcu);

702

EXPORT_SYMBOL(dev_get_by_name_rcu);

703

704

/**

704

/**

705

* dev_get_by_name - find a device by its name

705

* dev_get_by_name - find a device by its name

706

* @net: the applicable net namespace

706

* @net: the applicable net namespace

707

* @name: name to find

707

* @name: name to find

708

*

708

*

709

* Find an interface by name. This can be called from any

709

* Find an interface by name. This can be called from any

710

* context and does its own locking. The returned handle has

710

* context and does its own locking. The returned handle has

711

* the usage count incremented and the caller must use dev_put() to

711

* the usage count incremented and the caller must use dev_put() to

712

* release it when it is no longer needed. %NULL is returned if no

712

* release it when it is no longer needed. %NULL is returned if no

713

* matching device is found.

713

* matching device is found.

714

*/

714

*/

715

716

struct net_device *dev_get_by_name(struct net *net, const char *name)

716

struct net_device *dev_get_by_name(struct net *net, const char *name)

717

{

717

{

718

struct net_device *dev;

718

struct net_device *dev;

719

720

rcu_read_lock();

720

rcu_read_lock();

721

dev = dev_get_by_name_rcu(net, name);

721

dev = dev_get_by_name_rcu(net, name);

722

if (dev)

722

if (dev)

723

dev_hold(dev);

723

dev_hold(dev);

724

rcu_read_unlock();

724

rcu_read_unlock();

725

return dev;

725

return dev;

726

}

726

}

727

EXPORT_SYMBOL(dev_get_by_name);

727

EXPORT_SYMBOL(dev_get_by_name);

728

729

/**

729

/**

730

* __dev_get_by_index - find a device by its ifindex

730

* __dev_get_by_index - find a device by its ifindex

731

* @net: the applicable net namespace

731

* @net: the applicable net namespace

732

* @ifindex: index of device

732

* @ifindex: index of device

733

*

733

*

734

* Search for an interface by index. Returns %NULL if the device

734

* Search for an interface by index. Returns %NULL if the device

735

* is not found or a pointer to the device. The device has not

735

* is not found or a pointer to the device. The device has not

736

* had its reference counter increased so the caller must be careful

736

* had its reference counter increased so the caller must be careful

737

* about locking. The caller must hold either the RTNL semaphore

737

* about locking. The caller must hold either the RTNL semaphore

738

* or @dev_base_lock.

738

* or @dev_base_lock.

739

*/

739

*/

740

741

struct net_device *__dev_get_by_index(struct net *net, int ifindex)

741

struct net_device *__dev_get_by_index(struct net *net, int ifindex)

742

{

742

{

743

struct net_device *dev;

743

struct net_device *dev;

744

struct hlist_head *head = dev_index_hash(net, ifindex);

744

struct hlist_head *head = dev_index_hash(net, ifindex);

745

746

hlist_for_each_entry(dev, head, index_hlist)

746

hlist_for_each_entry(dev, head, index_hlist)

747

if (dev->ifindex == ifindex)

747

if (dev->ifindex == ifindex)

748

return dev;

748

return dev;

749

750

return NULL;

750

return NULL;

751

}

751

}

752

EXPORT_SYMBOL(__dev_get_by_index);

752

EXPORT_SYMBOL(__dev_get_by_index);

753

754

/**

754

/**

755

* dev_get_by_index_rcu - find a device by its ifindex

755

* dev_get_by_index_rcu - find a device by its ifindex

756

* @net: the applicable net namespace

756

* @net: the applicable net namespace

757

* @ifindex: index of device

757

* @ifindex: index of device

758

*

758

*

759

* Search for an interface by index. Returns %NULL if the device

759

* Search for an interface by index. Returns %NULL if the device

760

* is not found or a pointer to the device. The device has not

760

* is not found or a pointer to the device. The device has not

761

* had its reference counter increased so the caller must be careful

761

* had its reference counter increased so the caller must be careful

762

* about locking. The caller must hold RCU lock.

762

* about locking. The caller must hold RCU lock.

763

*/

763

*/

764

765

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)

765

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)

766

{

766

{

767

struct net_device *dev;

767

struct net_device *dev;

768

struct hlist_head *head = dev_index_hash(net, ifindex);

768

struct hlist_head *head = dev_index_hash(net, ifindex);

769

770

hlist_for_each_entry_rcu(dev, head, index_hlist)

770

hlist_for_each_entry_rcu(dev, head, index_hlist)

771

if (dev->ifindex == ifindex)

771

if (dev->ifindex == ifindex)

772

return dev;

772

return dev;

773

774

return NULL;

774

return NULL;

775

}

775

}

776

EXPORT_SYMBOL(dev_get_by_index_rcu);

776

EXPORT_SYMBOL(dev_get_by_index_rcu);

777

778

779

/**

779

/**

780

* dev_get_by_index - find a device by its ifindex

780

* dev_get_by_index - find a device by its ifindex

781

* @net: the applicable net namespace

781

* @net: the applicable net namespace

782

* @ifindex: index of device

782

* @ifindex: index of device

783

*

783

*

784

* Search for an interface by index. Returns NULL if the device

784

* Search for an interface by index. Returns NULL if the device

785

* is not found or a pointer to the device. The device returned has

785

* is not found or a pointer to the device. The device returned has

786

* had a reference added and the pointer is safe until the user calls

786

* had a reference added and the pointer is safe until the user calls

787

* dev_put to indicate they have finished with it.

787

* dev_put to indicate they have finished with it.

788

*/

788

*/

789

790

struct net_device *dev_get_by_index(struct net *net, int ifindex)

790

struct net_device *dev_get_by_index(struct net *net, int ifindex)

791

{

791

{

792

struct net_device *dev;

792

struct net_device *dev;

793

794

rcu_read_lock();

794

rcu_read_lock();

795

dev = dev_get_by_index_rcu(net, ifindex);

795

dev = dev_get_by_index_rcu(net, ifindex);

796

if (dev)

796

if (dev)

797

dev_hold(dev);

797

dev_hold(dev);

798

rcu_read_unlock();

798

rcu_read_unlock();

799

return dev;

799

return dev;

800

}

800

}

801

EXPORT_SYMBOL(dev_get_by_index);

801

EXPORT_SYMBOL(dev_get_by_index);

802

803

/**

803

/**

804

* netdev_get_name - get a netdevice name, knowing its ifindex.

804

* netdev_get_name - get a netdevice name, knowing its ifindex.

805

* @net: network namespace

805

* @net: network namespace

806

* @name: a pointer to the buffer where the name will be stored.

806

* @name: a pointer to the buffer where the name will be stored.

807

* @ifindex: the ifindex of the interface to get the name from.

807

* @ifindex: the ifindex of the interface to get the name from.

808

*

808

*

809

* The use of raw_seqcount_begin() and cond_resched() before

809

* The use of raw_seqcount_begin() and cond_resched() before

810

* retrying is required as we want to give the writers a chance

810

* retrying is required as we want to give the writers a chance

811

* to complete when CONFIG_PREEMPT is not set.

811

* to complete when CONFIG_PREEMPT is not set.

812

*/

812

*/

813

int netdev_get_name(struct net *net, char *name, int ifindex)

813

int netdev_get_name(struct net *net, char *name, int ifindex)

814

{

814

{

815

struct net_device *dev;

815

struct net_device *dev;

816

unsigned int seq;

816

unsigned int seq;

817

818

retry:

818

retry:

819

seq = raw_seqcount_begin(&devnet_rename_seq);

819

seq = raw_seqcount_begin(&devnet_rename_seq);

820

rcu_read_lock();

820

rcu_read_lock();

821

dev = dev_get_by_index_rcu(net, ifindex);

821

dev = dev_get_by_index_rcu(net, ifindex);

822

if (!dev) {

822

if (!dev) {

823

rcu_read_unlock();

823

rcu_read_unlock();

824

return -ENODEV;

824

return -ENODEV;

825

}

825

}

826

827

strcpy(name, dev->name);

827

strcpy(name, dev->name);

828

rcu_read_unlock();

828

rcu_read_unlock();

829

if (read_seqcount_retry(&devnet_rename_seq, seq)) {

829

if (read_seqcount_retry(&devnet_rename_seq, seq)) {

830

cond_resched();

830

cond_resched();

831

goto retry;

831

goto retry;

832

}

832

}

833

834

return 0;

834

return 0;

835

}

835

}

836

837

/**

837

/**

838

* dev_getbyhwaddr_rcu - find a device by its hardware address

838

* dev_getbyhwaddr_rcu - find a device by its hardware address

839

* @net: the applicable net namespace

839

* @net: the applicable net namespace

840

* @type: media type of device

840

* @type: media type of device

841

* @ha: hardware address

841

* @ha: hardware address

842

*

842

*

843

* Search for an interface by MAC address. Returns NULL if the device

843

* Search for an interface by MAC address. Returns NULL if the device

844

* is not found or a pointer to the device.

844

* is not found or a pointer to the device.

845

* The caller must hold RCU or RTNL.

845

* The caller must hold RCU or RTNL.

846

* The returned device has not had its ref count increased

846

* The returned device has not had its ref count increased

847

* and the caller must therefore be careful about locking

847

* and the caller must therefore be careful about locking

848

*

848

*

849

*/

849

*/

850

851

struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,

851

struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,

852

const char *ha)

852

const char *ha)

853

{

853

{

854

struct net_device *dev;

854

struct net_device *dev;

855

856

for_each_netdev_rcu(net, dev)

856

for_each_netdev_rcu(net, dev)

857

if (dev->type == type &&

857

if (dev->type == type &&

858

!memcmp(dev->dev_addr, ha, dev->addr_len))

858

!memcmp(dev->dev_addr, ha, dev->addr_len))

859

return dev;

859

return dev;

860

861

return NULL;

861

return NULL;

862

}

862

}

863

EXPORT_SYMBOL(dev_getbyhwaddr_rcu);

863

EXPORT_SYMBOL(dev_getbyhwaddr_rcu);

864

865

struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)

865

struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)

866

{

866

{

867

struct net_device *dev;

867

struct net_device *dev;

868

869

ASSERT_RTNL();

869

ASSERT_RTNL();

870

for_each_netdev(net, dev)

870

for_each_netdev(net, dev)

871

if (dev->type == type)

871

if (dev->type == type)

872

return dev;

872

return dev;

873

874

return NULL;

874

return NULL;

875

}

875

}

876

EXPORT_SYMBOL(__dev_getfirstbyhwtype);

876

EXPORT_SYMBOL(__dev_getfirstbyhwtype);

877

878

struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)

878

struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)

879

{

879

{

880

struct net_device *dev, *ret = NULL;

880

struct net_device *dev, *ret = NULL;

881

882

rcu_read_lock();

882

rcu_read_lock();

883

for_each_netdev_rcu(net, dev)

883

for_each_netdev_rcu(net, dev)

884

if (dev->type == type) {

884

if (dev->type == type) {

885

dev_hold(dev);

885

dev_hold(dev);

886

ret = dev;

886

ret = dev;

887

break;

887

break;

888

}

888

}

889

rcu_read_unlock();

889

rcu_read_unlock();

890

return ret;

890

return ret;

891

}

891

}

892

EXPORT_SYMBOL(dev_getfirstbyhwtype);

892

EXPORT_SYMBOL(dev_getfirstbyhwtype);

893

894

/**

894

/**

895

* dev_get_by_flags_rcu - find any device with given flags

895

* dev_get_by_flags_rcu - find any device with given flags

896

* @net: the applicable net namespace

896

* @net: the applicable net namespace

897

* @if_flags: IFF_* values

897

* @if_flags: IFF_* values

898

* @mask: bitmask of bits in if_flags to check

898

* @mask: bitmask of bits in if_flags to check

899

*

899

*

900

* Search for any interface with the given flags. Returns NULL if a device

900

* Search for any interface with the given flags. Returns NULL if a device

901

* is not found or a pointer to the device. Must be called inside

901

* is not found or a pointer to the device. Must be called inside

902

* rcu_read_lock(), and result refcount is unchanged.

902

* rcu_read_lock(), and result refcount is unchanged.

903

*/

903

*/

904

905

struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,

905

struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,

906

unsigned short mask)

906

unsigned short mask)

907

{

907

{

908

struct net_device *dev, *ret;

908

struct net_device *dev, *ret;

909

910

ret = NULL;

910

ret = NULL;

911

for_each_netdev_rcu(net, dev) {

911

for_each_netdev_rcu(net, dev) {

912

if (((dev->flags ^ if_flags) & mask) == 0) {

912

if (((dev->flags ^ if_flags) & mask) == 0) {

913

ret = dev;

913

ret = dev;

914

break;

914

break;

915

}

915

}

916

}

916

}

917

return ret;

917

return ret;

918

}

918

}

919

EXPORT_SYMBOL(dev_get_by_flags_rcu);

919

EXPORT_SYMBOL(dev_get_by_flags_rcu);

920

921

/**

921

/**

922

* dev_valid_name - check if name is okay for network device

922

* dev_valid_name - check if name is okay for network device

923

* @name: name string

923

* @name: name string

924

*

924

*

925

* Network device names need to be valid file names to

925

* Network device names need to be valid file names to

926

* to allow sysfs to work. We also disallow any kind of

926

* to allow sysfs to work. We also disallow any kind of

927

* whitespace.

927

* whitespace.

928

*/

928

*/

929

bool dev_valid_name(const char *name)

929

bool dev_valid_name(const char *name)

930

{

930

{

931

if (*name == '\0')

931

if (*name == '\0')

932

return false;

932

return false;

933

if (strlen(name) >= IFNAMSIZ)

933

if (strlen(name) >= IFNAMSIZ)

934

return false;

934

return false;

935

if (!strcmp(name, ".") || !strcmp(name, ".."))

935

if (!strcmp(name, ".") || !strcmp(name, ".."))

936

return false;

936

return false;

937

938

while (*name) {

938

while (*name) {

939

if (*name == '/' || isspace(*name))

939

if (*name == '/' || isspace(*name))

940

return false;

940

return false;

941

name++;

941

name++;

942

}

942

}

943

return true;

943

return true;

944

}

944

}

945

EXPORT_SYMBOL(dev_valid_name);

945

EXPORT_SYMBOL(dev_valid_name);

946

947

/**

947

/**

948

* __dev_alloc_name - allocate a name for a device

948

* __dev_alloc_name - allocate a name for a device

949

* @net: network namespace to allocate the device name in

949

* @net: network namespace to allocate the device name in

950

* @name: name format string

950

* @name: name format string

951

* @buf: scratch buffer and result name string

951

* @buf: scratch buffer and result name string

952

*

952

*

953

* Passed a format string - eg "lt%d" it will try and find a suitable

953

* Passed a format string - eg "lt%d" it will try and find a suitable

954

* id. It scans list of devices to build up a free map, then chooses

954

* id. It scans list of devices to build up a free map, then chooses

955

* the first empty slot. The caller must hold the dev_base or rtnl lock

955

* the first empty slot. The caller must hold the dev_base or rtnl lock

956

* while allocating the name and adding the device in order to avoid

956

* while allocating the name and adding the device in order to avoid

957

* duplicates.

957

* duplicates.

958

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

958

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

959

* Returns the number of the unit assigned or a negative errno code.

959

* Returns the number of the unit assigned or a negative errno code.

960

*/

960

*/

961

962

static int __dev_alloc_name(struct net *net, const char *name, char *buf)

962

static int __dev_alloc_name(struct net *net, const char *name, char *buf)

963

{

963

{

964

int i = 0;

964

int i = 0;

965

const char *p;

965

const char *p;

966

const int max_netdevices = 8*PAGE_SIZE;

966

const int max_netdevices = 8*PAGE_SIZE;

967

unsigned long *inuse;

967

unsigned long *inuse;

968

struct net_device *d;

968

struct net_device *d;

969

970

p = strnchr(name, IFNAMSIZ-1, '%');

970

p = strnchr(name, IFNAMSIZ-1, '%');

971

if (p) {

971

if (p) {

972

/*

972

/*

973

* Verify the string as this thing may have come from

973

* Verify the string as this thing may have come from

974

* the user. There must be either one "%d" and no other "%"

974

* the user. There must be either one "%d" and no other "%"

975

* characters.

975

* characters.

976

*/

976

*/

977

if (p[1] != 'd' || strchr(p + 2, '%'))

977

if (p[1] != 'd' || strchr(p + 2, '%'))

978

return -EINVAL;

978

return -EINVAL;

979

980

/* Use one page as a bit array of possible slots */

980

/* Use one page as a bit array of possible slots */

981

inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);

981

inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);

982

if (!inuse)

982

if (!inuse)

983

return -ENOMEM;

983

return -ENOMEM;

984

985

for_each_netdev(net, d) {

985

for_each_netdev(net, d) {

986

if (!sscanf(d->name, name, &i))

986

if (!sscanf(d->name, name, &i))

987

continue;

987

continue;

988

if (i < 0 || i >= max_netdevices)

988

if (i < 0 || i >= max_netdevices)

989

continue;

989

continue;

990

991

/* avoid cases where sscanf is not exact inverse of printf */

991

/* avoid cases where sscanf is not exact inverse of printf */

992

snprintf(buf, IFNAMSIZ, name, i);

992

snprintf(buf, IFNAMSIZ, name, i);

993

if (!strncmp(buf, d->name, IFNAMSIZ))

993

if (!strncmp(buf, d->name, IFNAMSIZ))

994

set_bit(i, inuse);

994

set_bit(i, inuse);

995

}

995

}

996

997

i = find_first_zero_bit(inuse, max_netdevices);

997

i = find_first_zero_bit(inuse, max_netdevices);

998

free_page((unsigned long) inuse);

998

free_page((unsigned long) inuse);

999

}

999

}

1000

1001

if (buf != name)

1001

if (buf != name)

1002

snprintf(buf, IFNAMSIZ, name, i);

1002

snprintf(buf, IFNAMSIZ, name, i);

1003

if (!__dev_get_by_name(net, buf))

1003

if (!__dev_get_by_name(net, buf))

1004

return i;

1004

return i;

1005

1006

/* It is possible to run out of possible slots

1006

/* It is possible to run out of possible slots

1007

* when the name is long and there isn't enough space left

1007

* when the name is long and there isn't enough space left

1008

* for the digits, or if all bits are used.

1008

* for the digits, or if all bits are used.

1009

*/

1009

*/

1010

return -ENFILE;

1010

return -ENFILE;

1011

}

1011

}

1012

1013

/**

1013

/**

1014

* dev_alloc_name - allocate a name for a device

1014

* dev_alloc_name - allocate a name for a device

1015

* @dev: device

1015

* @dev: device

1016

* @name: name format string

1016

* @name: name format string

1017

*

1017

*

1018

* Passed a format string - eg "lt%d" it will try and find a suitable

1018

* Passed a format string - eg "lt%d" it will try and find a suitable

1019

* id. It scans list of devices to build up a free map, then chooses

1019

* id. It scans list of devices to build up a free map, then chooses

1020

* the first empty slot. The caller must hold the dev_base or rtnl lock

1020

* the first empty slot. The caller must hold the dev_base or rtnl lock

1021

* while allocating the name and adding the device in order to avoid

1021

* while allocating the name and adding the device in order to avoid

1022

* duplicates.

1022

* duplicates.

1023

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

1023

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

1024

* Returns the number of the unit assigned or a negative errno code.

1024

* Returns the number of the unit assigned or a negative errno code.

1025

*/

1025

*/

1026

1027

int dev_alloc_name(struct net_device *dev, const char *name)

1027

int dev_alloc_name(struct net_device *dev, const char *name)

1028

{

1028

{

1029

char buf[IFNAMSIZ];

1029

char buf[IFNAMSIZ];

1030

struct net *net;

1030

struct net *net;

1031

int ret;

1031

int ret;

1032

1033

BUG_ON(!dev_net(dev));

1033

BUG_ON(!dev_net(dev));

1034

net = dev_net(dev);

1034

net = dev_net(dev);

1035

ret = __dev_alloc_name(net, name, buf);

1035

ret = __dev_alloc_name(net, name, buf);

1036

if (ret >= 0)

1036

if (ret >= 0)

1037

strlcpy(dev->name, buf, IFNAMSIZ);

1037

strlcpy(dev->name, buf, IFNAMSIZ);

1038

return ret;

1038

return ret;

1039

}

1039

}

1040

EXPORT_SYMBOL(dev_alloc_name);

1040

EXPORT_SYMBOL(dev_alloc_name);

1041

1042

static int dev_alloc_name_ns(struct net *net,

1042

static int dev_alloc_name_ns(struct net *net,

1043

struct net_device *dev,

1043

struct net_device *dev,

1044

const char *name)

1044

const char *name)

1045

{

1045

{

1046

char buf[IFNAMSIZ];

1046

char buf[IFNAMSIZ];

1047

int ret;

1047

int ret;

1048

1049

ret = __dev_alloc_name(net, name, buf);

1049

ret = __dev_alloc_name(net, name, buf);

1050

if (ret >= 0)

1050

if (ret >= 0)

1051

strlcpy(dev->name, buf, IFNAMSIZ);

1051

strlcpy(dev->name, buf, IFNAMSIZ);

1052

return ret;

1052

return ret;

1053

}

1053

}

1054

1055

static int dev_get_valid_name(struct net *net,

1055

static int dev_get_valid_name(struct net *net,

1056

struct net_device *dev,

1056

struct net_device *dev,

1057

const char *name)

1057

const char *name)

1058

{

1058

{

1059

BUG_ON(!net);

1059

BUG_ON(!net);

1060

1061

if (!dev_valid_name(name))

1061

if (!dev_valid_name(name))

1062

return -EINVAL;

1062

return -EINVAL;

1063

1064

if (strchr(name, '%'))

1064

if (strchr(name, '%'))

1065

return dev_alloc_name_ns(net, dev, name);

1065

return dev_alloc_name_ns(net, dev, name);

1066

else if (__dev_get_by_name(net, name))

1066

else if (__dev_get_by_name(net, name))

1067

return -EEXIST;

1067

return -EEXIST;

1068

else if (dev->name != name)

1068

else if (dev->name != name)

1069

strlcpy(dev->name, name, IFNAMSIZ);

1069

strlcpy(dev->name, name, IFNAMSIZ);

1070

1071

return 0;

1071

return 0;

1072

}

1072

}

1073

1074

/**

1074

/**

1075

* dev_change_name - change name of a device

1075

* dev_change_name - change name of a device

1076

* @dev: device

1076

* @dev: device

1077

* @newname: name (or format string) must be at least IFNAMSIZ

1077

* @newname: name (or format string) must be at least IFNAMSIZ

1078

*

1078

*

1079

* Change name of a device, can pass format strings "eth%d".

1079

* Change name of a device, can pass format strings "eth%d".

1080

* for wildcarding.

1080

* for wildcarding.

1081

*/

1081

*/

1082

int dev_change_name(struct net_device *dev, const char *newname)

1082

int dev_change_name(struct net_device *dev, const char *newname)

1083

{

1083

{

1084

char oldname[IFNAMSIZ];

1084

char oldname[IFNAMSIZ];

1085

int err = 0;

1085

int err = 0;

1086

int ret;

1086

int ret;

1087

struct net *net;

1087

struct net *net;

1088

1089

ASSERT_RTNL();

1089

ASSERT_RTNL();

1090

BUG_ON(!dev_net(dev));

1090

BUG_ON(!dev_net(dev));

1091

1092

net = dev_net(dev);

1092

net = dev_net(dev);

1093

if (dev->flags & IFF_UP)

1093

if (dev->flags & IFF_UP)

1094

return -EBUSY;

1094

return -EBUSY;

1095

1096

write_seqcount_begin(&devnet_rename_seq);

1096

write_seqcount_begin(&devnet_rename_seq);

1097

1098

if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {

1098

if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {

1099

write_seqcount_end(&devnet_rename_seq);

1099

write_seqcount_end(&devnet_rename_seq);

1100

return 0;

1100

return 0;

1101

}

1101

}

1102

1103

memcpy(oldname, dev->name, IFNAMSIZ);

1103

memcpy(oldname, dev->name, IFNAMSIZ);

1104

1105

err = dev_get_valid_name(net, dev, newname);

1105

err = dev_get_valid_name(net, dev, newname);

1106

if (err < 0) {

1106

if (err < 0) {

1107

write_seqcount_end(&devnet_rename_seq);

1107

write_seqcount_end(&devnet_rename_seq);

1108

return err;

1108

return err;

1109

}

1109

}

1110

1111

rollback:

1111

rollback:

1112

ret = device_rename(&dev->dev, dev->name);

1112

ret = device_rename(&dev->dev, dev->name);

1113

if (ret) {

1113

if (ret) {

1114

memcpy(dev->name, oldname, IFNAMSIZ);

1114

memcpy(dev->name, oldname, IFNAMSIZ);

1115

write_seqcount_end(&devnet_rename_seq);

1115

write_seqcount_end(&devnet_rename_seq);

1116

return ret;

1116

return ret;

1117

}

1117

}

1118

1119

write_seqcount_end(&devnet_rename_seq);

1119

write_seqcount_end(&devnet_rename_seq);

1120

1121

write_lock_bh(&dev_base_lock);

1121

write_lock_bh(&dev_base_lock);

1122

hlist_del_rcu(&dev->name_hlist);

1122

hlist_del_rcu(&dev->name_hlist);

1123

write_unlock_bh(&dev_base_lock);

1123

write_unlock_bh(&dev_base_lock);

1124

1125

synchronize_rcu();

1125

synchronize_rcu();

1126

1127

write_lock_bh(&dev_base_lock);

1127

write_lock_bh(&dev_base_lock);

1128

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

1128

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

1129

write_unlock_bh(&dev_base_lock);

1129

write_unlock_bh(&dev_base_lock);

1130

1131

ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);

1131

ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);

1132

ret = notifier_to_errno(ret);

1132

ret = notifier_to_errno(ret);

1133

1134

if (ret) {

1134

if (ret) {

1135

/* err >= 0 after dev_alloc_name() or stores the first errno */

1135

/* err >= 0 after dev_alloc_name() or stores the first errno */

1136

if (err >= 0) {

1136

if (err >= 0) {

1137

err = ret;

1137

err = ret;

1138

write_seqcount_begin(&devnet_rename_seq);

1138

write_seqcount_begin(&devnet_rename_seq);

1139

memcpy(dev->name, oldname, IFNAMSIZ);

1139

memcpy(dev->name, oldname, IFNAMSIZ);

1140

goto rollback;

1140

goto rollback;

1141

} else {

1141

} else {

1142

pr_err("%s: name change rollback failed: %d\n",

1142

pr_err("%s: name change rollback failed: %d\n",

1143

dev->name, ret);

1143

dev->name, ret);

1144

}

1144

}

1145

}

1145

}

1146

1147

return err;

1147

return err;

1148

}

1148

}

1149

1150

/**

1150

/**

1151

* dev_set_alias - change ifalias of a device

1151

* dev_set_alias - change ifalias of a device

1152

* @dev: device

1152

* @dev: device

1153

* @alias: name up to IFALIASZ

1153

* @alias: name up to IFALIASZ

1154

* @len: limit of bytes to copy from info

1154

* @len: limit of bytes to copy from info

1155

*

1155

*

1156

* Set ifalias for a device,

1156

* Set ifalias for a device,

1157

*/

1157

*/

1158

int dev_set_alias(struct net_device *dev, const char *alias, size_t len)

1158

int dev_set_alias(struct net_device *dev, const char *alias, size_t len)

1159

{

1159

{

1160

char *new_ifalias;

1160

char *new_ifalias;

1161

1162

ASSERT_RTNL();

1162

ASSERT_RTNL();

1163

1164

if (len >= IFALIASZ)

1164

if (len >= IFALIASZ)

1165

return -EINVAL;

1165

return -EINVAL;

1166

1167

if (!len) {

1167

if (!len) {

1168

kfree(dev->ifalias);

1168

kfree(dev->ifalias);

1169

dev->ifalias = NULL;

1169

dev->ifalias = NULL;

1170

return 0;

1170

return 0;

1171

}

1171

}

1172

1173

new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);

1173

new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);

1174

if (!new_ifalias)

1174

if (!new_ifalias)

1175

return -ENOMEM;

1175

return -ENOMEM;

1176

dev->ifalias = new_ifalias;

1176

dev->ifalias = new_ifalias;

1177

1178

strlcpy(dev->ifalias, alias, len+1);

1178

strlcpy(dev->ifalias, alias, len+1);

1179

return len;

1179

return len;

1180

}

1180

}

1181

1182

1183

/**

1183

/**

1184

* netdev_features_change - device changes features

1184

* netdev_features_change - device changes features

1185

* @dev: device to cause notification

1185

* @dev: device to cause notification

1186

*

1186

*

1187

* Called to indicate a device has changed features.

1187

* Called to indicate a device has changed features.

1188

*/

1188

*/

1189

void netdev_features_change(struct net_device *dev)

1189

void netdev_features_change(struct net_device *dev)

1190

{

1190

{

1191

call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);

1191

call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);

1192

}

1192

}

1193

EXPORT_SYMBOL(netdev_features_change);

1193

EXPORT_SYMBOL(netdev_features_change);

1194

1195

/**

1195

/**

1196

* netdev_state_change - device changes state

1196

* netdev_state_change - device changes state

1197

* @dev: device to cause notification

1197

* @dev: device to cause notification

1198

*

1198

*

1199

* Called to indicate a device has changed state. This function calls

1199

* Called to indicate a device has changed state. This function calls

1200

* the notifier chains for netdev_chain and sends a NEWLINK message

1200

* the notifier chains for netdev_chain and sends a NEWLINK message

1201

* to the routing socket.

1201

* to the routing socket.

1202

*/

1202

*/

1203

void netdev_state_change(struct net_device *dev)

1203

void netdev_state_change(struct net_device *dev)

1204

{

1204

{

1205

if (dev->flags & IFF_UP) {

1205

if (dev->flags & IFF_UP) {

1206

call_netdevice_notifiers(NETDEV_CHANGE, dev);

1206

call_netdevice_notifiers(NETDEV_CHANGE, dev);

1207

rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);

1207

rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);

1208

}

1208

}

1209

}

1209

}

1210

EXPORT_SYMBOL(netdev_state_change);

1210

EXPORT_SYMBOL(netdev_state_change);

1211

1212

/**

1212

/**

1213

* netdev_notify_peers - notify network peers about existence of @dev

1213

* netdev_notify_peers - notify network peers about existence of @dev

1214

* @dev: network device

1214

* @dev: network device

1215

*

1215

*

1216

* Generate traffic such that interested network peers are aware of

1216

* Generate traffic such that interested network peers are aware of

1217

* @dev, such as by generating a gratuitous ARP. This may be used when

1217

* @dev, such as by generating a gratuitous ARP. This may be used when

1218

* a device wants to inform the rest of the network about some sort of

1218

* a device wants to inform the rest of the network about some sort of

1219

* reconfiguration such as a failover event or virtual machine

1219

* reconfiguration such as a failover event or virtual machine

1220

* migration.

1220

* migration.

1221

*/

1221

*/

1222

void netdev_notify_peers(struct net_device *dev)

1222

void netdev_notify_peers(struct net_device *dev)

1223

{

1223

{

1224

rtnl_lock();

1224

rtnl_lock();

1225

call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);

1225

call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);

1226

rtnl_unlock();

1226

rtnl_unlock();

1227

}

1227

}

1228

EXPORT_SYMBOL(netdev_notify_peers);

1228

EXPORT_SYMBOL(netdev_notify_peers);

1229

1230

static int __dev_open(struct net_device *dev)

1230

static int __dev_open(struct net_device *dev)

1231

{

1231

{

1232

const struct net_device_ops *ops = dev->netdev_ops;

1232

const struct net_device_ops *ops = dev->netdev_ops;

1233

int ret;

1233

int ret;

1234

1235

ASSERT_RTNL();

1235

ASSERT_RTNL();

1236

1237

if (!netif_device_present(dev))

1237

if (!netif_device_present(dev))

1238

return -ENODEV;

1238

return -ENODEV;

1239

1240

/* Block netpoll from trying to do any rx path servicing.

1240

/* Block netpoll from trying to do any rx path servicing.

1241

* If we don't do this there is a chance ndo_poll_controller

1241

* If we don't do this there is a chance ndo_poll_controller

1242

* or ndo_poll may be running while we open the device

1242

* or ndo_poll may be running while we open the device

1243

*/

1243

*/

1244

netpoll_rx_disable(dev);

1244

netpoll_rx_disable(dev);

1245

1246

ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);

1246

ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);

1247

ret = notifier_to_errno(ret);

1247

ret = notifier_to_errno(ret);

1248

if (ret)

1248

if (ret)

1249

return ret;

1249

return ret;

1250

1251

set_bit(__LINK_STATE_START, &dev->state);

1251

set_bit(__LINK_STATE_START, &dev->state);

1252

1253

if (ops->ndo_validate_addr)

1253

if (ops->ndo_validate_addr)

1254

ret = ops->ndo_validate_addr(dev);

1254

ret = ops->ndo_validate_addr(dev);

1255

1256

if (!ret && ops->ndo_open)

1256

if (!ret && ops->ndo_open)

1257

ret = ops->ndo_open(dev);

1257

ret = ops->ndo_open(dev);

1258

1259

netpoll_rx_enable(dev);

1259

netpoll_rx_enable(dev);

1260

1261

if (ret)

1261

if (ret)

1262

clear_bit(__LINK_STATE_START, &dev->state);

1262

clear_bit(__LINK_STATE_START, &dev->state);

1263

else {

1263

else {

1264

dev->flags |= IFF_UP;

1264

dev->flags |= IFF_UP;

1265

net_dmaengine_get();

1265

net_dmaengine_get();

1266

dev_set_rx_mode(dev);

1266

dev_set_rx_mode(dev);

1267

dev_activate(dev);

1267

dev_activate(dev);

1268

add_device_randomness(dev->dev_addr, dev->addr_len);

1268

add_device_randomness(dev->dev_addr, dev->addr_len);

1269

}

1269

}

1270

1271

return ret;

1271

return ret;

1272

}

1272

}

1273

1274

/**

1274

/**

1275

* dev_open - prepare an interface for use.

1275

* dev_open - prepare an interface for use.

1276

* @dev: device to open

1276

* @dev: device to open

1277

*

1277

*

1278

* Takes a device from down to up state. The device's private open

1278

* Takes a device from down to up state. The device's private open

1279

* function is invoked and then the multicast lists are loaded. Finally

1279

* function is invoked and then the multicast lists are loaded. Finally

1280

* the device is moved into the up state and a %NETDEV_UP message is

1280

* the device is moved into the up state and a %NETDEV_UP message is

1281

* sent to the netdev notifier chain.

1281

* sent to the netdev notifier chain.

1282

*

1282

*

1283

* Calling this function on an active interface is a nop. On a failure

1283

* Calling this function on an active interface is a nop. On a failure

1284

* a negative errno code is returned.

1284

* a negative errno code is returned.

1285

*/

1285

*/

1286

int dev_open(struct net_device *dev)

1286

int dev_open(struct net_device *dev)

1287

{

1287

{

1288

int ret;

1288

int ret;

1289

1290

if (dev->flags & IFF_UP)

1290

if (dev->flags & IFF_UP)

1291

return 0;

1291

return 0;

1292

1293

ret = __dev_open(dev);

1293

ret = __dev_open(dev);

1294

if (ret < 0)

1294

if (ret < 0)

1295

return ret;

1295

return ret;

1296

1297

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1297

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1298

call_netdevice_notifiers(NETDEV_UP, dev);

1298

call_netdevice_notifiers(NETDEV_UP, dev);

1299

1300

return ret;

1300

return ret;

1301

}

1301

}

1302

EXPORT_SYMBOL(dev_open);

1302

EXPORT_SYMBOL(dev_open);

1303

1304

static int __dev_close_many(struct list_head *head)

1304

static int __dev_close_many(struct list_head *head)

1305

{

1305

{

1306

struct net_device *dev;

1306

struct net_device *dev;

1307

1308

ASSERT_RTNL();

1308

ASSERT_RTNL();

1309

might_sleep();

1309

might_sleep();

1310

1311

list_for_each_entry(dev, head, close_list) {

1311

list_for_each_entry(dev, head, close_list) {

1312

call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

1312

call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

1313

1314

clear_bit(__LINK_STATE_START, &dev->state);

1314

clear_bit(__LINK_STATE_START, &dev->state);

1315

1316

/* Synchronize to scheduled poll. We cannot touch poll list, it

1316

/* Synchronize to scheduled poll. We cannot touch poll list, it

1317

* can be even on different cpu. So just clear netif_running().

1317

* can be even on different cpu. So just clear netif_running().

1318

*

1318

*

1319

* dev->stop() will invoke napi_disable() on all of it's

1319

* dev->stop() will invoke napi_disable() on all of it's

1320

* napi_struct instances on this device.

1320

* napi_struct instances on this device.

1321

*/

1321

*/

1322

smp_mb__after_clear_bit(); /* Commit netif_running(). */

1322

smp_mb__after_clear_bit(); /* Commit netif_running(). */

1323

}

1323

}

1324

1325

dev_deactivate_many(head);

1325

dev_deactivate_many(head);

1326

1327

list_for_each_entry(dev, head, close_list) {

1327

list_for_each_entry(dev, head, close_list) {

1328

const struct net_device_ops *ops = dev->netdev_ops;

1328

const struct net_device_ops *ops = dev->netdev_ops;

1329

1330

/*

1330

/*

1331

* Call the device specific close. This cannot fail.

1331

* Call the device specific close. This cannot fail.

1332

* Only if device is UP

1332

* Only if device is UP

1333

*

1333

*

1334

* We allow it to be called even after a DETACH hot-plug

1334

* We allow it to be called even after a DETACH hot-plug

1335

* event.

1335

* event.

1336

*/

1336

*/

1337

if (ops->ndo_stop)

1337

if (ops->ndo_stop)

1338

ops->ndo_stop(dev);

1338

ops->ndo_stop(dev);

1339

1340

dev->flags &= ~IFF_UP;

1340

dev->flags &= ~IFF_UP;

1341

net_dmaengine_put();

1341

net_dmaengine_put();

1342

}

1342

}

1343

1344

return 0;

1344

return 0;

1345

}

1345

}

1346

1347

static int __dev_close(struct net_device *dev)

1347

static int __dev_close(struct net_device *dev)

1348

{

1348

{

1349

int retval;

1349

int retval;

1350

LIST_HEAD(single);

1350

LIST_HEAD(single);

1351

1352

/* Temporarily disable netpoll until the interface is down */

1352

/* Temporarily disable netpoll until the interface is down */

1353

netpoll_rx_disable(dev);

1353

netpoll_rx_disable(dev);

1354

1355

list_add(&dev->close_list, &single);

1355

list_add(&dev->close_list, &single);

1356

retval = __dev_close_many(&single);

1356

retval = __dev_close_many(&single);

1357

list_del(&single);

1357

list_del(&single);

1358

1359

netpoll_rx_enable(dev);

1359

netpoll_rx_enable(dev);

1360

return retval;

1360

return retval;

1361

}

1361

}

1362

1363

static int dev_close_many(struct list_head *head)

1363

static int dev_close_many(struct list_head *head)

1364

{

1364

{

1365

struct net_device *dev, *tmp;

1365

struct net_device *dev, *tmp;

1366

1367

/* Remove the devices that don't need to be closed */

1367

/* Remove the devices that don't need to be closed */

1368

list_for_each_entry_safe(dev, tmp, head, close_list)

1368

list_for_each_entry_safe(dev, tmp, head, close_list)

1369

if (!(dev->flags & IFF_UP))

1369

if (!(dev->flags & IFF_UP))

1370

list_del_init(&dev->close_list);

1370

list_del_init(&dev->close_list);

1371

1372

__dev_close_many(head);

1372

__dev_close_many(head);

1373

1374

list_for_each_entry_safe(dev, tmp, head, close_list) {

1374

list_for_each_entry_safe(dev, tmp, head, close_list) {

1375

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1375

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1376

call_netdevice_notifiers(NETDEV_DOWN, dev);

1376

call_netdevice_notifiers(NETDEV_DOWN, dev);

1377

list_del_init(&dev->close_list);

1377

list_del_init(&dev->close_list);

1378

}

1378

}

1379

1380

return 0;

1380

return 0;

1381

}

1381

}

1382

1383

/**

1383

/**

1384

* dev_close - shutdown an interface.

1384

* dev_close - shutdown an interface.

1385

* @dev: device to shutdown

1385

* @dev: device to shutdown

1386

*

1386

*

1387

* This function moves an active device into down state. A

1387

* This function moves an active device into down state. A

1388

* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device

1388

* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device

1389

* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier

1389

* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier

1390

* chain.

1390

* chain.

1391

*/

1391

*/

1392

int dev_close(struct net_device *dev)

1392

int dev_close(struct net_device *dev)

1393

{

1393

{

1394

if (dev->flags & IFF_UP) {

1394

if (dev->flags & IFF_UP) {

1395

LIST_HEAD(single);

1395

LIST_HEAD(single);

1396

1397

/* Block netpoll rx while the interface is going down */

1397

/* Block netpoll rx while the interface is going down */

1398

netpoll_rx_disable(dev);

1398

netpoll_rx_disable(dev);

1399

1400

list_add(&dev->close_list, &single);

1400

list_add(&dev->close_list, &single);

1401

dev_close_many(&single);

1401

dev_close_many(&single);

1402

list_del(&single);

1402

list_del(&single);

1403

1404

netpoll_rx_enable(dev);

1404

netpoll_rx_enable(dev);

1405

}

1405

}

1406

return 0;

1406

return 0;

1407

}

1407

}

1408

EXPORT_SYMBOL(dev_close);

1408

EXPORT_SYMBOL(dev_close);

1409

1410

1411

/**

1411

/**

1412

* dev_disable_lro - disable Large Receive Offload on a device

1412

* dev_disable_lro - disable Large Receive Offload on a device

1413

* @dev: device

1413

* @dev: device

1414

*

1414

*

1415

* Disable Large Receive Offload (LRO) on a net device. Must be

1415

* Disable Large Receive Offload (LRO) on a net device. Must be

1416

* called under RTNL. This is needed if received packets may be

1416

* called under RTNL. This is needed if received packets may be

1417

* forwarded to another interface.

1417

* forwarded to another interface.

1418

*/

1418

*/

1419

void dev_disable_lro(struct net_device *dev)

1419

void dev_disable_lro(struct net_device *dev)

1420

{

1420

{

1421

/*

1421

/*

1422

* If we're trying to disable lro on a vlan device

1422

* If we're trying to disable lro on a vlan device

1423

* use the underlying physical device instead

1423

* use the underlying physical device instead

1424

*/

1424

*/

1425

if (is_vlan_dev(dev))

1425

if (is_vlan_dev(dev))

1426

dev = vlan_dev_real_dev(dev);

1426

dev = vlan_dev_real_dev(dev);

1427

1428

/* the same for macvlan devices */

1428

/* the same for macvlan devices */

1429

if (netif_is_macvlan(dev))

1429

if (netif_is_macvlan(dev))

1430

dev = macvlan_dev_real_dev(dev);

1430

dev = macvlan_dev_real_dev(dev);

1431

1432

dev->wanted_features &= ~NETIF_F_LRO;

1432

dev->wanted_features &= ~NETIF_F_LRO;

1433

netdev_update_features(dev);

1433

netdev_update_features(dev);

1434

1435

if (unlikely(dev->features & NETIF_F_LRO))

1435

if (unlikely(dev->features & NETIF_F_LRO))

1436

netdev_WARN(dev, "failed to disable LRO!\n");

1436

netdev_WARN(dev, "failed to disable LRO!\n");

1437

}

1437

}

1438

EXPORT_SYMBOL(dev_disable_lro);

1438

EXPORT_SYMBOL(dev_disable_lro);

1439

1440

static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,

1440

static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,

1441

struct net_device *dev)

1441

struct net_device *dev)

1442

{

1442

{

1443

struct netdev_notifier_info info;

1443

struct netdev_notifier_info info;

1444

1445

netdev_notifier_info_init(&info, dev);

1445

netdev_notifier_info_init(&info, dev);

1446

return nb->notifier_call(nb, val, &info);

1446

return nb->notifier_call(nb, val, &info);

1447

}

1447

}

1448

1449

static int dev_boot_phase = 1;

1449

static int dev_boot_phase = 1;

1450

1451

/**

1451

/**

1452

* register_netdevice_notifier - register a network notifier block

1452

* register_netdevice_notifier - register a network notifier block

1453

* @nb: notifier

1453

* @nb: notifier

1454

*

1454

*

1455

* Register a notifier to be called when network device events occur.

1455

* Register a notifier to be called when network device events occur.

1456

* The notifier passed is linked into the kernel structures and must

1456

* The notifier passed is linked into the kernel structures and must

1457

* not be reused until it has been unregistered. A negative errno code

1457

* not be reused until it has been unregistered. A negative errno code

1458

* is returned on a failure.

1458

* is returned on a failure.

1459

*

1459

*

1460

* When registered all registration and up events are replayed

1460

* When registered all registration and up events are replayed

1461

* to the new notifier to allow device to have a race free

1461

* to the new notifier to allow device to have a race free

1462

* view of the network device list.

1462

* view of the network device list.

1463

*/

1463

*/

1464

1465

int register_netdevice_notifier(struct notifier_block *nb)

1465

int register_netdevice_notifier(struct notifier_block *nb)

1466

{

1466

{

1467

struct net_device *dev;

1467

struct net_device *dev;

1468

struct net_device *last;

1468

struct net_device *last;

1469

struct net *net;

1469

struct net *net;

1470

int err;

1470

int err;

1471

1472

rtnl_lock();

1472

rtnl_lock();

1473

err = raw_notifier_chain_register(&netdev_chain, nb);

1473

err = raw_notifier_chain_register(&netdev_chain, nb);

1474

if (err)

1474

if (err)

1475

goto unlock;

1475

goto unlock;

1476

if (dev_boot_phase)

1476

if (dev_boot_phase)

1477

goto unlock;

1477

goto unlock;

1478

for_each_net(net) {

1478

for_each_net(net) {

1479

for_each_netdev(net, dev) {

1479

for_each_netdev(net, dev) {

1480

err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);

1480

err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);

1481

err = notifier_to_errno(err);

1481

err = notifier_to_errno(err);

1482

if (err)

1482

if (err)

1483

goto rollback;

1483

goto rollback;

1484

1485

if (!(dev->flags & IFF_UP))

1485

if (!(dev->flags & IFF_UP))

1486

continue;

1486

continue;

1487

1488

call_netdevice_notifier(nb, NETDEV_UP, dev);

1488

call_netdevice_notifier(nb, NETDEV_UP, dev);

1489

}

1489

}

1490

}

1490

}

1491

1492

unlock:

1492

unlock:

1493

rtnl_unlock();

1493

rtnl_unlock();

1494

return err;

1494

return err;

1495

1496

rollback:

1496

rollback:

1497

last = dev;

1497

last = dev;

1498

for_each_net(net) {

1498

for_each_net(net) {

1499

for_each_netdev(net, dev) {

1499

for_each_netdev(net, dev) {

1500

if (dev == last)

1500

if (dev == last)

1501

goto outroll;

1501

goto outroll;

1502

1503

if (dev->flags & IFF_UP) {

1503

if (dev->flags & IFF_UP) {

1504

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1504

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1505

dev);

1505

dev);

1506

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1506

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1507

}

1507

}

1508

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1508

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1509

}

1509

}

1510

}

1510

}

1511

1512

outroll:

1512

outroll:

1513

raw_notifier_chain_unregister(&netdev_chain, nb);

1513

raw_notifier_chain_unregister(&netdev_chain, nb);

1514

goto unlock;

1514

goto unlock;

1515

}

1515

}

1516

EXPORT_SYMBOL(register_netdevice_notifier);

1516

EXPORT_SYMBOL(register_netdevice_notifier);

1517

1518

/**

1518

/**

1519

* unregister_netdevice_notifier - unregister a network notifier block

1519

* unregister_netdevice_notifier - unregister a network notifier block

1520

* @nb: notifier

1520

* @nb: notifier

1521

*

1521

*

1522

* Unregister a notifier previously registered by

1522

* Unregister a notifier previously registered by

1523

* register_netdevice_notifier(). The notifier is unlinked into the

1523

* register_netdevice_notifier(). The notifier is unlinked into the

1524

* kernel structures and may then be reused. A negative errno code

1524

* kernel structures and may then be reused. A negative errno code

1525

* is returned on a failure.

1525

* is returned on a failure.

1526

*

1526

*

1527

* After unregistering unregister and down device events are synthesized

1527

* After unregistering unregister and down device events are synthesized

1528

* for all devices on the device list to the removed notifier to remove

1528

* for all devices on the device list to the removed notifier to remove

1529

* the need for special case cleanup code.

1529

* the need for special case cleanup code.

1530

*/

1530

*/

1531

1532

int unregister_netdevice_notifier(struct notifier_block *nb)

1532

int unregister_netdevice_notifier(struct notifier_block *nb)

1533

{

1533

{

1534

struct net_device *dev;

1534

struct net_device *dev;

1535

struct net *net;

1535

struct net *net;

1536

int err;

1536

int err;

1537

1538

rtnl_lock();

1538

rtnl_lock();

1539

err = raw_notifier_chain_unregister(&netdev_chain, nb);

1539

err = raw_notifier_chain_unregister(&netdev_chain, nb);

1540

if (err)

1540

if (err)

1541

goto unlock;

1541

goto unlock;

1542

1543

for_each_net(net) {

1543

for_each_net(net) {

1544

for_each_netdev(net, dev) {

1544

for_each_netdev(net, dev) {

1545

if (dev->flags & IFF_UP) {

1545

if (dev->flags & IFF_UP) {

1546

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1546

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1547

dev);

1547

dev);

1548

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1548

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1549

}

1549

}

1550

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1550

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1551

}

1551

}

1552

}

1552

}

1553

unlock:

1553

unlock:

1554

rtnl_unlock();

1554

rtnl_unlock();

1555

return err;

1555

return err;

1556

}

1556

}

1557

EXPORT_SYMBOL(unregister_netdevice_notifier);

1557

EXPORT_SYMBOL(unregister_netdevice_notifier);

1558

1559

/**

1559

/**

1560

* call_netdevice_notifiers_info - call all network notifier blocks

1560

* call_netdevice_notifiers_info - call all network notifier blocks

1561

* @val: value passed unmodified to notifier function

1561

* @val: value passed unmodified to notifier function

1562

* @dev: net_device pointer passed unmodified to notifier function

1562

* @dev: net_device pointer passed unmodified to notifier function

1563

* @info: notifier information data

1563

* @info: notifier information data

1564

*

1564

*

1565

* Call all network notifier blocks. Parameters and return value

1565

* Call all network notifier blocks. Parameters and return value

1566

* are as for raw_notifier_call_chain().

1566

* are as for raw_notifier_call_chain().

1567

*/

1567

*/

1568

1569

int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,

1569

int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,

1570

struct netdev_notifier_info *info)

1570

struct netdev_notifier_info *info)

1571

{

1571

{

1572

ASSERT_RTNL();

1572

ASSERT_RTNL();

1573

netdev_notifier_info_init(info, dev);

1573

netdev_notifier_info_init(info, dev);

1574

return raw_notifier_call_chain(&netdev_chain, val, info);

1574

return raw_notifier_call_chain(&netdev_chain, val, info);

1575

}

1575

}

1576

EXPORT_SYMBOL(call_netdevice_notifiers_info);

1576

EXPORT_SYMBOL(call_netdevice_notifiers_info);

1577

1578

/**

1578

/**

1579

* call_netdevice_notifiers - call all network notifier blocks

1579

* call_netdevice_notifiers - call all network notifier blocks

1580

* @val: value passed unmodified to notifier function

1580

* @val: value passed unmodified to notifier function

1581

* @dev: net_device pointer passed unmodified to notifier function

1581

* @dev: net_device pointer passed unmodified to notifier function

1582

*

1582

*

1583

* Call all network notifier blocks. Parameters and return value

1583

* Call all network notifier blocks. Parameters and return value

1584

* are as for raw_notifier_call_chain().

1584

* are as for raw_notifier_call_chain().

1585

*/

1585

*/

1586

1587

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)

1587

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)

1588

{

1588

{

1589

struct netdev_notifier_info info;

1589

struct netdev_notifier_info info;

1590

1591

return call_netdevice_notifiers_info(val, dev, &info);

1591

return call_netdevice_notifiers_info(val, dev, &info);

1592

}

1592

}

1593

EXPORT_SYMBOL(call_netdevice_notifiers);

1593

EXPORT_SYMBOL(call_netdevice_notifiers);

1594

1595

static struct static_key netstamp_needed __read_mostly;

1595

static struct static_key netstamp_needed __read_mostly;

1596

#ifdef HAVE_JUMP_LABEL

1596

#ifdef HAVE_JUMP_LABEL

1597

/* We are not allowed to call static_key_slow_dec() from irq context

1597

/* We are not allowed to call static_key_slow_dec() from irq context

1598

* If net_disable_timestamp() is called from irq context, defer the

1598

* If net_disable_timestamp() is called from irq context, defer the

1599

* static_key_slow_dec() calls.

1599

* static_key_slow_dec() calls.

1600

*/

1600

*/

1601

static atomic_t netstamp_needed_deferred;

1601

static atomic_t netstamp_needed_deferred;

1602

#endif

1602

#endif

1603

1604

void net_enable_timestamp(void)

1604

void net_enable_timestamp(void)

1605

{

1605

{

1606

#ifdef HAVE_JUMP_LABEL

1606

#ifdef HAVE_JUMP_LABEL

1607

int deferred = atomic_xchg(&netstamp_needed_deferred, 0);

1607

int deferred = atomic_xchg(&netstamp_needed_deferred, 0);

1608

1609

if (deferred) {

1609

if (deferred) {

1610

while (--deferred)

1610

while (--deferred)

1611

static_key_slow_dec(&netstamp_needed);

1611

static_key_slow_dec(&netstamp_needed);

1612

return;

1612

return;

1613

}

1613

}

1614

#endif

1614

#endif

1615

static_key_slow_inc(&netstamp_needed);

1615

static_key_slow_inc(&netstamp_needed);

1616

}

1616

}

1617

EXPORT_SYMBOL(net_enable_timestamp);

1617

EXPORT_SYMBOL(net_enable_timestamp);

1618

1619

void net_disable_timestamp(void)

1619

void net_disable_timestamp(void)

1620

{

1620

{

1621

#ifdef HAVE_JUMP_LABEL

1621

#ifdef HAVE_JUMP_LABEL

1622

if (in_interrupt()) {

1622

if (in_interrupt()) {

1623

atomic_inc(&netstamp_needed_deferred);

1623

atomic_inc(&netstamp_needed_deferred);

1624

return;

1624

return;

1625

}

1625

}

1626

#endif

1626

#endif

1627

static_key_slow_dec(&netstamp_needed);

1627

static_key_slow_dec(&netstamp_needed);

1628

}

1628

}

1629

EXPORT_SYMBOL(net_disable_timestamp);

1629

EXPORT_SYMBOL(net_disable_timestamp);

1630

1631

static inline void net_timestamp_set(struct sk_buff *skb)

1631

static inline void net_timestamp_set(struct sk_buff *skb)

1632

{

1632

{

1633

skb->tstamp.tv64 = 0;

1633

skb->tstamp.tv64 = 0;

1634

if (static_key_false(&netstamp_needed))

1634

if (static_key_false(&netstamp_needed))

1635

__net_timestamp(skb);

1635

__net_timestamp(skb);

1636

}

1636

}

1637

1638

#define net_timestamp_check(COND, SKB) \

1638

#define net_timestamp_check(COND, SKB) \

1639

if (static_key_false(&netstamp_needed)) { \

1639

if (static_key_false(&netstamp_needed)) { \

1640

if ((COND) && !(SKB)->tstamp.tv64) \

1640

if ((COND) && !(SKB)->tstamp.tv64) \

1641

__net_timestamp(SKB); \

1641

__net_timestamp(SKB); \

1642

} \

1642

} \

1643

1644

static inline bool is_skb_forwardable(struct net_device *dev,

1644

static inline bool is_skb_forwardable(struct net_device *dev,

1645

struct sk_buff *skb)

1645

struct sk_buff *skb)

1646

{

1646

{

1647

unsigned int len;

1647

unsigned int len;

1648

1649

if (!(dev->flags & IFF_UP))

1649

if (!(dev->flags & IFF_UP))

1650

return false;

1650

return false;

1651

1652

len = dev->mtu + dev->hard_header_len + VLAN_HLEN;

1652

len = dev->mtu + dev->hard_header_len + VLAN_HLEN;

1653

if (skb->len <= len)

1653

if (skb->len <= len)

1654

return true;

1654

return true;

1655

1656

/* if TSO is enabled, we don't care about the length as the packet

1656

/* if TSO is enabled, we don't care about the length as the packet

1657

* could be forwarded without being segmented before

1657

* could be forwarded without being segmented before

1658

*/

1658

*/

1659

if (skb_is_gso(skb))

1659

if (skb_is_gso(skb))

1660

return true;

1660

return true;

1661

1662

return false;

1662

return false;

1663

}

1663

}

1664

1665

/**

1665

/**

1666

* dev_forward_skb - loopback an skb to another netif

1666

* dev_forward_skb - loopback an skb to another netif

1667

*

1667

*

1668

* @dev: destination network device

1668

* @dev: destination network device

1669

* @skb: buffer to forward

1669

* @skb: buffer to forward

1670

*

1670

*

1671

* return values:

1671

* return values:

1672

* NET_RX_SUCCESS (no congestion)

1672

* NET_RX_SUCCESS (no congestion)

1673

* NET_RX_DROP (packet was dropped, but freed)

1673

* NET_RX_DROP (packet was dropped, but freed)

1674

*

1674

*

1675

* dev_forward_skb can be used for injecting an skb from the

1675

* dev_forward_skb can be used for injecting an skb from the

1676

* start_xmit function of one device into the receive queue

1676

* start_xmit function of one device into the receive queue

1677

* of another device.

1677

* of another device.

1678

*

1678

*

1679

* The receiving device may be in another namespace, so

1679

* The receiving device may be in another namespace, so

1680

* we have to clear all information in the skb that could

1680

* we have to clear all information in the skb that could

1681

* impact namespace isolation.

1681

* impact namespace isolation.

1682

*/

1682

*/

1683

int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)

1683

int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)

1684

{

1684

{

1685

if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {

1685

if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {

1686

if (skb_copy_ubufs(skb, GFP_ATOMIC)) {

1686

if (skb_copy_ubufs(skb, GFP_ATOMIC)) {

1687

atomic_long_inc(&dev->rx_dropped);

1687

atomic_long_inc(&dev->rx_dropped);

1688

kfree_skb(skb);

1688

kfree_skb(skb);

1689

return NET_RX_DROP;

1689

return NET_RX_DROP;

1690

}

1690

}

1691

}

1691

}

1692

1693

if (unlikely(!is_skb_forwardable(dev, skb))) {

1693

if (unlikely(!is_skb_forwardable(dev, skb))) {

1694

atomic_long_inc(&dev->rx_dropped);

1694

atomic_long_inc(&dev->rx_dropped);

1695

kfree_skb(skb);

1695

kfree_skb(skb);

1696

return NET_RX_DROP;

1696

return NET_RX_DROP;

1697

}

1697

}

1698

1699

skb_scrub_packet(skb, true);

1699

skb_scrub_packet(skb, true);

1700

skb->protocol = eth_type_trans(skb, dev);

1700

skb->protocol = eth_type_trans(skb, dev);

1701

1702

return netif_rx(skb);

1702

return netif_rx(skb);

1703

}

1703

}

1704

EXPORT_SYMBOL_GPL(dev_forward_skb);

1704

EXPORT_SYMBOL_GPL(dev_forward_skb);

1705

1706

static inline int deliver_skb(struct sk_buff *skb,

1706

static inline int deliver_skb(struct sk_buff *skb,

1707

struct packet_type *pt_prev,

1707

struct packet_type *pt_prev,

1708

struct net_device *orig_dev)

1708

struct net_device *orig_dev)

1709

{

1709

{

1710

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

1710

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

1711

return -ENOMEM;

1711

return -ENOMEM;

1712

atomic_inc(&skb->users);

1712

atomic_inc(&skb->users);

1713

return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

1713

return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

1714

}

1714

}

1715

1716

static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)

1716

static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)

1717

{

1717

{

1718

if (!ptype->af_packet_priv || !skb->sk)

1718

if (!ptype->af_packet_priv || !skb->sk)

1719

return false;

1719

return false;

1720

1721

if (ptype->id_match)

1721

if (ptype->id_match)

1722

return ptype->id_match(ptype, skb->sk);

1722

return ptype->id_match(ptype, skb->sk);

1723

else if ((struct sock *)ptype->af_packet_priv == skb->sk)

1723

else if ((struct sock *)ptype->af_packet_priv == skb->sk)

1724

return true;

1724

return true;

1725

1726

return false;

1726

return false;

1727

}

1727

}

1728

1729

/*

1729

/*

1730

* Support routine. Sends outgoing frames to any network

1730

* Support routine. Sends outgoing frames to any network

1731

* taps currently in use.

1731

* taps currently in use.

1732

*/

1732

*/

1733

1734

static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)

1734

static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)

1735

{

1735

{

1736

struct packet_type *ptype;

1736

struct packet_type *ptype;

1737

struct sk_buff *skb2 = NULL;

1737

struct sk_buff *skb2 = NULL;

1738

struct packet_type *pt_prev = NULL;

1738

struct packet_type *pt_prev = NULL;

1739

1740

rcu_read_lock();

1740

rcu_read_lock();

1741

list_for_each_entry_rcu(ptype, &ptype_all, list) {

1741

list_for_each_entry_rcu(ptype, &ptype_all, list) {

1742

/* Never send packets back to the socket

1742

/* Never send packets back to the socket

1743

* they originated from - MvS (miquels@drinkel.ow.org)

1743

* they originated from - MvS (miquels@drinkel.ow.org)

1744

*/

1744

*/

1745

if ((ptype->dev == dev || !ptype->dev) &&

1745

if ((ptype->dev == dev || !ptype->dev) &&

1746

(!skb_loop_sk(ptype, skb))) {

1746

(!skb_loop_sk(ptype, skb))) {

1747

if (pt_prev) {

1747

if (pt_prev) {

1748

deliver_skb(skb2, pt_prev, skb->dev);

1748

deliver_skb(skb2, pt_prev, skb->dev);

1749

pt_prev = ptype;

1749

pt_prev = ptype;

1750

continue;

1750

continue;

1751

}

1751

}

1752

1753

skb2 = skb_clone(skb, GFP_ATOMIC);

1753

skb2 = skb_clone(skb, GFP_ATOMIC);

1754

if (!skb2)

1754

if (!skb2)

1755

break;

1755

break;

1756

1757

net_timestamp_set(skb2);

1757

net_timestamp_set(skb2);

1758

1759

/* skb->nh should be correctly

1759

/* skb->nh should be correctly

1760

set by sender, so that the second statement is

1760

set by sender, so that the second statement is

1761

just protection against buggy protocols.

1761

just protection against buggy protocols.

1762

*/

1762

*/

1763

skb_reset_mac_header(skb2);

1763

skb_reset_mac_header(skb2);

1764

1765

if (skb_network_header(skb2) < skb2->data ||

1765

if (skb_network_header(skb2) < skb2->data ||

1766

skb_network_header(skb2) > skb_tail_pointer(skb2)) {

1766

skb_network_header(skb2) > skb_tail_pointer(skb2)) {

1767

net_crit_ratelimited("protocol %04x is buggy, dev %s\n",

1767

net_crit_ratelimited("protocol %04x is buggy, dev %s\n",

1768

ntohs(skb2->protocol),

1768

ntohs(skb2->protocol),

1769

dev->name);

1769

dev->name);

1770

skb_reset_network_header(skb2);

1770

skb_reset_network_header(skb2);

1771

}

1771

}

1772

1773

skb2->transport_header = skb2->network_header;

1773

skb2->transport_header = skb2->network_header;

1774

skb2->pkt_type = PACKET_OUTGOING;

1774

skb2->pkt_type = PACKET_OUTGOING;

1775

pt_prev = ptype;

1775

pt_prev = ptype;

1776

}

1776

}

1777

}

1777

}

1778

if (pt_prev)

1778

if (pt_prev)

1779

pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);

1779

pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);

1780

rcu_read_unlock();

1780

rcu_read_unlock();

1781

}

1781

}

1782

1783

/**

1783

/**

1784

* netif_setup_tc - Handle tc mappings on real_num_tx_queues change

1784

* netif_setup_tc - Handle tc mappings on real_num_tx_queues change

1785

* @dev: Network device

1785

* @dev: Network device

1786

* @txq: number of queues available

1786

* @txq: number of queues available

1787

*

1787

*

1788

* If real_num_tx_queues is changed the tc mappings may no longer be

1788

* If real_num_tx_queues is changed the tc mappings may no longer be

1789

* valid. To resolve this verify the tc mapping remains valid and if

1789

* valid. To resolve this verify the tc mapping remains valid and if

1790

* not NULL the mapping. With no priorities mapping to this

1790

* not NULL the mapping. With no priorities mapping to this

1791

* offset/count pair it will no longer be used. In the worst case TC0

1791

* offset/count pair it will no longer be used. In the worst case TC0

1792

* is invalid nothing can be done so disable priority mappings. If is

1792

* is invalid nothing can be done so disable priority mappings. If is

1793

* expected that drivers will fix this mapping if they can before

1793

* expected that drivers will fix this mapping if they can before

1794

* calling netif_set_real_num_tx_queues.

1794

* calling netif_set_real_num_tx_queues.

1795

*/

1795

*/

1796

static void netif_setup_tc(struct net_device *dev, unsigned int txq)

1796

static void netif_setup_tc(struct net_device *dev, unsigned int txq)

1797

{

1797

{

1798

int i;

1798

int i;

1799

struct netdev_tc_txq *tc = &dev->tc_to_txq[0];

1799

struct netdev_tc_txq *tc = &dev->tc_to_txq[0];

1800

1801

/* If TC0 is invalidated disable TC mapping */

1801

/* If TC0 is invalidated disable TC mapping */

1802

if (tc->offset + tc->count > txq) {

1802

if (tc->offset + tc->count > txq) {

1803

pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");

1803

pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");

1804

dev->num_tc = 0;

1804

dev->num_tc = 0;

1805

return;

1805

return;

1806

}

1806

}

1807

1808

/* Invalidated prio to tc mappings set to TC0 */

1808

/* Invalidated prio to tc mappings set to TC0 */

1809

for (i = 1; i < TC_BITMASK + 1; i++) {

1809

for (i = 1; i < TC_BITMASK + 1; i++) {

1810

int q = netdev_get_prio_tc_map(dev, i);

1810

int q = netdev_get_prio_tc_map(dev, i);

1811

1812

tc = &dev->tc_to_txq[q];

1812

tc = &dev->tc_to_txq[q];

1813

if (tc->offset + tc->count > txq) {

1813

if (tc->offset + tc->count > txq) {

1814

pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",

1814

pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",

1815

i, q);

1815

i, q);

1816

netdev_set_prio_tc_map(dev, i, 0);

1816

netdev_set_prio_tc_map(dev, i, 0);

1817

}

1817

}

1818

}

1818

}

1819

}

1819

}

1820

1821

#ifdef CONFIG_XPS

1821

#ifdef CONFIG_XPS

1822

static DEFINE_MUTEX(xps_map_mutex);

1822

static DEFINE_MUTEX(xps_map_mutex);

1823

#define xmap_dereference(P) \

1823

#define xmap_dereference(P) \

1824

rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))

1824

rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))

1825

1826

static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,

1826

static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,

1827

int cpu, u16 index)

1827

int cpu, u16 index)

1828

{

1828

{

1829

struct xps_map *map = NULL;

1829

struct xps_map *map = NULL;

1830

int pos;

1830

int pos;

1831

1832

if (dev_maps)

1832

if (dev_maps)

1833

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1833

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1834

1835

for (pos = 0; map && pos < map->len; pos++) {

1835

for (pos = 0; map && pos < map->len; pos++) {

1836

if (map->queues[pos] == index) {

1836

if (map->queues[pos] == index) {

1837

if (map->len > 1) {

1837

if (map->len > 1) {

1838

map->queues[pos] = map->queues[--map->len];

1838

map->queues[pos] = map->queues[--map->len];

1839

} else {

1839

} else {

1840

RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);

1840

RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);

1841

kfree_rcu(map, rcu);

1841

kfree_rcu(map, rcu);

1842

map = NULL;

1842

map = NULL;

1843

}

1843

}

1844

break;

1844

break;

1845

}

1845

}

1846

}

1846

}

1847

1848

return map;

1848

return map;

1849

}

1849

}

1850

1851

static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)

1851

static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)

1852

{

1852

{

1853

struct xps_dev_maps *dev_maps;

1853

struct xps_dev_maps *dev_maps;

1854

int cpu, i;

1854

int cpu, i;

1855

bool active = false;

1855

bool active = false;

1856

1857

mutex_lock(&xps_map_mutex);

1857

mutex_lock(&xps_map_mutex);

1858

dev_maps = xmap_dereference(dev->xps_maps);

1858

dev_maps = xmap_dereference(dev->xps_maps);

1859

1860

if (!dev_maps)

1860

if (!dev_maps)

1861

goto out_no_maps;

1861

goto out_no_maps;

1862

1863

for_each_possible_cpu(cpu) {

1863

for_each_possible_cpu(cpu) {

1864

for (i = index; i < dev->num_tx_queues; i++) {

1864

for (i = index; i < dev->num_tx_queues; i++) {

1865

if (!remove_xps_queue(dev_maps, cpu, i))

1865

if (!remove_xps_queue(dev_maps, cpu, i))

1866

break;

1866

break;

1867

}

1867

}

1868

if (i == dev->num_tx_queues)

1868

if (i == dev->num_tx_queues)

1869

active = true;

1869

active = true;

1870

}

1870

}

1871

1872

if (!active) {

1872

if (!active) {

1873

RCU_INIT_POINTER(dev->xps_maps, NULL);

1873

RCU_INIT_POINTER(dev->xps_maps, NULL);

1874

kfree_rcu(dev_maps, rcu);

1874

kfree_rcu(dev_maps, rcu);

1875

}

1875

}

1876

1877

for (i = index; i < dev->num_tx_queues; i++)

1877

for (i = index; i < dev->num_tx_queues; i++)

1878

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),

1878

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),

1879

NUMA_NO_NODE);

1879

NUMA_NO_NODE);

1880

1881

out_no_maps:

1881

out_no_maps:

1882

mutex_unlock(&xps_map_mutex);

1882

mutex_unlock(&xps_map_mutex);

1883

}

1883

}

1884

1885

static struct xps_map *expand_xps_map(struct xps_map *map,

1885

static struct xps_map *expand_xps_map(struct xps_map *map,

1886

int cpu, u16 index)

1886

int cpu, u16 index)

1887

{

1887

{

1888

struct xps_map *new_map;

1888

struct xps_map *new_map;

1889

int alloc_len = XPS_MIN_MAP_ALLOC;

1889

int alloc_len = XPS_MIN_MAP_ALLOC;

1890

int i, pos;

1890

int i, pos;

1891

1892

for (pos = 0; map && pos < map->len; pos++) {

1892

for (pos = 0; map && pos < map->len; pos++) {

1893

if (map->queues[pos] != index)

1893

if (map->queues[pos] != index)

1894

continue;

1894

continue;

1895

return map;

1895

return map;

1896

}

1896

}

1897

1898

/* Need to add queue to this CPU's existing map */

1898

/* Need to add queue to this CPU's existing map */

1899

if (map) {

1899

if (map) {

1900

if (pos < map->alloc_len)

1900

if (pos < map->alloc_len)

1901

return map;

1901

return map;

1902

1903

alloc_len = map->alloc_len * 2;

1903

alloc_len = map->alloc_len * 2;

1904

}

1904

}

1905

1906

/* Need to allocate new map to store queue on this CPU's map */

1906

/* Need to allocate new map to store queue on this CPU's map */

1907

new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,

1907

new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,

1908

cpu_to_node(cpu));

1908

cpu_to_node(cpu));

1909

if (!new_map)

1909

if (!new_map)

1910

return NULL;

1910

return NULL;

1911

1912

for (i = 0; i < pos; i++)

1912

for (i = 0; i < pos; i++)

1913

new_map->queues[i] = map->queues[i];

1913

new_map->queues[i] = map->queues[i];

1914

new_map->alloc_len = alloc_len;

1914

new_map->alloc_len = alloc_len;

1915

new_map->len = pos;

1915

new_map->len = pos;

1916

1917

return new_map;

1917

return new_map;

1918

}

1918

}

1919

1920

int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,

1920

int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,

1921

u16 index)

1921

u16 index)

1922

{

1922

{

1923

struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;

1923

struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;

1924

struct xps_map *map, *new_map;

1924

struct xps_map *map, *new_map;

1925

int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);

1925

int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);

1926

int cpu, numa_node_id = -2;

1926

int cpu, numa_node_id = -2;

1927

bool active = false;

1927

bool active = false;

1928

1929

mutex_lock(&xps_map_mutex);

1929

mutex_lock(&xps_map_mutex);

1930

1931

dev_maps = xmap_dereference(dev->xps_maps);

1931

dev_maps = xmap_dereference(dev->xps_maps);

1932

1933

/* allocate memory for queue storage */

1933

/* allocate memory for queue storage */

1934

for_each_online_cpu(cpu) {

1934

for_each_online_cpu(cpu) {

1935

if (!cpumask_test_cpu(cpu, mask))

1935

if (!cpumask_test_cpu(cpu, mask))

1936

continue;

1936

continue;

1937

1938

if (!new_dev_maps)

1938

if (!new_dev_maps)

1939

new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);

1939

new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);

1940

if (!new_dev_maps) {

1940

if (!new_dev_maps) {

1941

mutex_unlock(&xps_map_mutex);

1941

mutex_unlock(&xps_map_mutex);

1942

return -ENOMEM;

1942

return -ENOMEM;

1943

}

1943

}

1944

1945

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

1945

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

1946

NULL;

1946

NULL;

1947

1948

map = expand_xps_map(map, cpu, index);

1948

map = expand_xps_map(map, cpu, index);

1949

if (!map)

1949

if (!map)

1950

goto error;

1950

goto error;

1951

1952

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1952

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1953

}

1953

}

1954

1955

if (!new_dev_maps)

1955

if (!new_dev_maps)

1956

goto out_no_new_maps;

1956

goto out_no_new_maps;

1957

1958

for_each_possible_cpu(cpu) {

1958

for_each_possible_cpu(cpu) {

1959

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {

1959

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {

1960

/* add queue to CPU maps */

1960

/* add queue to CPU maps */

1961

int pos = 0;

1961

int pos = 0;

1962

1963

map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1963

map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1964

while ((pos < map->len) && (map->queues[pos] != index))

1964

while ((pos < map->len) && (map->queues[pos] != index))

1965

pos++;

1965

pos++;

1966

1967

if (pos == map->len)

1967

if (pos == map->len)

1968

map->queues[map->len++] = index;

1968

map->queues[map->len++] = index;

1969

#ifdef CONFIG_NUMA

1969

#ifdef CONFIG_NUMA

1970

if (numa_node_id == -2)

1970

if (numa_node_id == -2)

1971

numa_node_id = cpu_to_node(cpu);

1971

numa_node_id = cpu_to_node(cpu);

1972

else if (numa_node_id != cpu_to_node(cpu))

1972

else if (numa_node_id != cpu_to_node(cpu))

1973

numa_node_id = -1;

1973

numa_node_id = -1;

1974

#endif

1974

#endif

1975

} else if (dev_maps) {

1975

} else if (dev_maps) {

1976

/* fill in the new device map from the old device map */

1976

/* fill in the new device map from the old device map */

1977

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1977

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1978

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1978

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1979

}

1979

}

1980

1981

}

1981

}

1982

1983

rcu_assign_pointer(dev->xps_maps, new_dev_maps);

1983

rcu_assign_pointer(dev->xps_maps, new_dev_maps);

1984

1985

/* Cleanup old maps */

1985

/* Cleanup old maps */

1986

if (dev_maps) {

1986

if (dev_maps) {

1987

for_each_possible_cpu(cpu) {

1987

for_each_possible_cpu(cpu) {

1988

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1988

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1989

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1989

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1990

if (map && map != new_map)

1990

if (map && map != new_map)

1991

kfree_rcu(map, rcu);

1991

kfree_rcu(map, rcu);

1992

}

1992

}

1993

1994

kfree_rcu(dev_maps, rcu);

1994

kfree_rcu(dev_maps, rcu);

1995

}

1995

}

1996

1997

dev_maps = new_dev_maps;

1997

dev_maps = new_dev_maps;

1998

active = true;

1998

active = true;

1999

2000

out_no_new_maps:

2000

out_no_new_maps:

2001

/* update Tx queue numa node */

2001

/* update Tx queue numa node */

2002

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),

2002

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),

2003

(numa_node_id >= 0) ? numa_node_id :

2003

(numa_node_id >= 0) ? numa_node_id :

2004

NUMA_NO_NODE);

2004

NUMA_NO_NODE);

2005

2006

if (!dev_maps)

2006

if (!dev_maps)

2007

goto out_no_maps;

2007

goto out_no_maps;

2008

2009

/* removes queue from unused CPUs */

2009

/* removes queue from unused CPUs */

2010

for_each_possible_cpu(cpu) {

2010

for_each_possible_cpu(cpu) {

2011

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))

2011

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))

2012

continue;

2012

continue;

2013

2014

if (remove_xps_queue(dev_maps, cpu, index))

2014

if (remove_xps_queue(dev_maps, cpu, index))

2015

active = true;

2015

active = true;

2016

}

2016

}

2017

2018

/* free map if not active */

2018

/* free map if not active */

2019

if (!active) {

2019

if (!active) {

2020

RCU_INIT_POINTER(dev->xps_maps, NULL);

2020

RCU_INIT_POINTER(dev->xps_maps, NULL);

2021

kfree_rcu(dev_maps, rcu);

2021

kfree_rcu(dev_maps, rcu);

2022

}

2022

}

2023

2024

out_no_maps:

2024

out_no_maps:

2025

mutex_unlock(&xps_map_mutex);

2025

mutex_unlock(&xps_map_mutex);

2026

2027

return 0;

2027

return 0;

2028

error:

2028

error:

2029

/* remove any maps that we added */

2029

/* remove any maps that we added */

2030

for_each_possible_cpu(cpu) {

2030

for_each_possible_cpu(cpu) {

2031

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

2031

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

2032

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

2032

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

2033

NULL;

2033

NULL;

2034

if (new_map && new_map != map)

2034

if (new_map && new_map != map)

2035

kfree(new_map);

2035

kfree(new_map);

2036

}

2036

}

2037

2038

mutex_unlock(&xps_map_mutex);

2038

mutex_unlock(&xps_map_mutex);

2039

2040

kfree(new_dev_maps);

2040

kfree(new_dev_maps);

2041

return -ENOMEM;

2041

return -ENOMEM;

2042

}

2042

}

2043

EXPORT_SYMBOL(netif_set_xps_queue);

2043

EXPORT_SYMBOL(netif_set_xps_queue);

2044

2045

#endif

2045

#endif

2046

/*

2046

/*

2047

* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues

2047

* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues

2048

* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.

2048

* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.

2049

*/

2049

*/

2050

int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

2050

int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

2051

{

2051

{

2052

int rc;

2052

int rc;

2053

2054

if (txq < 1 || txq > dev->num_tx_queues)

2054

if (txq < 1 || txq > dev->num_tx_queues)

2055

return -EINVAL;

2055

return -EINVAL;

2056

2057

if (dev->reg_state == NETREG_REGISTERED ||

2057

if (dev->reg_state == NETREG_REGISTERED ||

2058

dev->reg_state == NETREG_UNREGISTERING) {

2058

dev->reg_state == NETREG_UNREGISTERING) {

2059

ASSERT_RTNL();

2059

ASSERT_RTNL();

2060

2061

rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,

2061

rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,

2062

txq);

2062

txq);

2063

if (rc)

2063

if (rc)

2064

return rc;

2064

return rc;

2065

2066

if (dev->num_tc)

2066

if (dev->num_tc)

2067

netif_setup_tc(dev, txq);

2067

netif_setup_tc(dev, txq);

2068

2069

if (txq < dev->real_num_tx_queues) {

2069

if (txq < dev->real_num_tx_queues) {

2070

qdisc_reset_all_tx_gt(dev, txq);

2070

qdisc_reset_all_tx_gt(dev, txq);

2071

#ifdef CONFIG_XPS

2071

#ifdef CONFIG_XPS

2072

netif_reset_xps_queues_gt(dev, txq);

2072

netif_reset_xps_queues_gt(dev, txq);

2073

#endif

2073

#endif

2074

}

2074

}

2075

}

2075

}

2076

2077

dev->real_num_tx_queues = txq;

2077

dev->real_num_tx_queues = txq;

2078

return 0;

2078

return 0;

2079

}

2079

}

2080

EXPORT_SYMBOL(netif_set_real_num_tx_queues);

2080

EXPORT_SYMBOL(netif_set_real_num_tx_queues);

2081

2082

#ifdef CONFIG_RPS

2082

#ifdef CONFIG_RPS

2083

/**

2083

/**

2084

* netif_set_real_num_rx_queues - set actual number of RX queues used

2084

* netif_set_real_num_rx_queues - set actual number of RX queues used

2085

* @dev: Network device

2085

* @dev: Network device

2086

* @rxq: Actual number of RX queues

2086

* @rxq: Actual number of RX queues

2087

*

2087

*

2088

* This must be called either with the rtnl_lock held or before

2088

* This must be called either with the rtnl_lock held or before

2089

* registration of the net device. Returns 0 on success, or a

2089

* registration of the net device. Returns 0 on success, or a

2090

* negative error code. If called before registration, it always

2090

* negative error code. If called before registration, it always

2091

* succeeds.

2091

* succeeds.

2092

*/

2092

*/

2093

int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)

2093

int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)

2094

{

2094

{

2095

int rc;

2095

int rc;

2096

2097

if (rxq < 1 || rxq > dev->num_rx_queues)

2097

if (rxq < 1 || rxq > dev->num_rx_queues)

2098

return -EINVAL;

2098

return -EINVAL;

2099

2100

if (dev->reg_state == NETREG_REGISTERED) {

2100

if (dev->reg_state == NETREG_REGISTERED) {

2101

ASSERT_RTNL();

2101

ASSERT_RTNL();

2102

2103

rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,

2103

rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,

2104

rxq);

2104

rxq);

2105

if (rc)

2105

if (rc)

2106

return rc;

2106

return rc;

2107

}

2107

}

2108

2109

dev->real_num_rx_queues = rxq;

2109

dev->real_num_rx_queues = rxq;

2110

return 0;

2110

return 0;

2111

}

2111

}

2112

EXPORT_SYMBOL(netif_set_real_num_rx_queues);

2112

EXPORT_SYMBOL(netif_set_real_num_rx_queues);

2113

#endif

2113

#endif

2114

2115

/**

2115

/**

2116

* netif_get_num_default_rss_queues - default number of RSS queues

2116

* netif_get_num_default_rss_queues - default number of RSS queues

2117

*

2117

*

2118

* This routine should set an upper limit on the number of RSS queues

2118

* This routine should set an upper limit on the number of RSS queues

2119

* used by default by multiqueue devices.

2119

* used by default by multiqueue devices.

2120

*/

2120

*/

2121

int netif_get_num_default_rss_queues(void)

2121

int netif_get_num_default_rss_queues(void)

2122

{

2122

{

2123

return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());

2123

return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());

2124

}

2124

}

2125

EXPORT_SYMBOL(netif_get_num_default_rss_queues);

2125

EXPORT_SYMBOL(netif_get_num_default_rss_queues);

2126

2127

static inline void __netif_reschedule(struct Qdisc *q)

2127

static inline void __netif_reschedule(struct Qdisc *q)

2128

{

2128

{

2129

struct softnet_data *sd;

2129

struct softnet_data *sd;

2130

unsigned long flags;

2130

unsigned long flags;

2131

2132

local_irq_save(flags);

2132

local_irq_save(flags);

2133

sd = &__get_cpu_var(softnet_data);

2133

sd = &__get_cpu_var(softnet_data);

2134

q->next_sched = NULL;

2134

q->next_sched = NULL;

2135

*sd->output_queue_tailp = q;

2135

*sd->output_queue_tailp = q;

2136

sd->output_queue_tailp = &q->next_sched;

2136

sd->output_queue_tailp = &q->next_sched;

2137

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2137

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2138

local_irq_restore(flags);

2138

local_irq_restore(flags);

2139

}

2139

}

2140

2141

void __netif_schedule(struct Qdisc *q)

2141

void __netif_schedule(struct Qdisc *q)

2142

{

2142

{

2143

if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))

2143

if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))

2144

__netif_reschedule(q);

2144

__netif_reschedule(q);

2145

}

2145

}

2146

EXPORT_SYMBOL(__netif_schedule);

2146

EXPORT_SYMBOL(__netif_schedule);

2147

2148

struct dev_kfree_skb_cb {

2148

struct dev_kfree_skb_cb {

2149

enum skb_free_reason reason;

2149

enum skb_free_reason reason;

2150

};

2150

};

2151

2152

static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)

2152

static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)

2153

{

2153

{

2154

return (struct dev_kfree_skb_cb *)skb->cb;

2154

return (struct dev_kfree_skb_cb *)skb->cb;

2155

}

2155

}

2156

2157

void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)

2157

void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)

2158

{

2158

{

2159

unsigned long flags;

2159

unsigned long flags;

2160

2161

if (likely(atomic_read(&skb->users) == 1)) {

2161

if (likely(atomic_read(&skb->users) == 1)) {

2162

smp_rmb();

2162

smp_rmb();

2163

atomic_set(&skb->users, 0);

2163

atomic_set(&skb->users, 0);

2164

} else if (likely(!atomic_dec_and_test(&skb->users))) {

2164

} else if (likely(!atomic_dec_and_test(&skb->users))) {

2165

return;

2165

return;

2166

}

2166

}

2167

get_kfree_skb_cb(skb)->reason = reason;

2167

get_kfree_skb_cb(skb)->reason = reason;

2168

local_irq_save(flags);

2168

local_irq_save(flags);

2169

skb->next = __this_cpu_read(softnet_data.completion_queue);

2169

skb->next = __this_cpu_read(softnet_data.completion_queue);

2170

__this_cpu_write(softnet_data.completion_queue, skb);

2170

__this_cpu_write(softnet_data.completion_queue, skb);

2171

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2171

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2172

local_irq_restore(flags);

2172

local_irq_restore(flags);

2173

}

2173

}

2174

EXPORT_SYMBOL(__dev_kfree_skb_irq);

2174

EXPORT_SYMBOL(__dev_kfree_skb_irq);

2175

2176

void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)

2176

void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)

2177

{

2177

{

2178

if (in_irq() || irqs_disabled())

2178

if (in_irq() || irqs_disabled())

2179

__dev_kfree_skb_irq(skb, reason);

2179

__dev_kfree_skb_irq(skb, reason);

2180

else

2180

else

2181

dev_kfree_skb(skb);

2181

dev_kfree_skb(skb);

2182

}

2182

}

2183

EXPORT_SYMBOL(__dev_kfree_skb_any);

2183

EXPORT_SYMBOL(__dev_kfree_skb_any);

2184

2185

2186

/**

2186

/**

2187

* netif_device_detach - mark device as removed

2187

* netif_device_detach - mark device as removed

2188

* @dev: network device

2188

* @dev: network device

2189

*

2189

*

2190

* Mark device as removed from system and therefore no longer available.

2190

* Mark device as removed from system and therefore no longer available.

2191

*/

2191

*/

2192

void netif_device_detach(struct net_device *dev)

2192

void netif_device_detach(struct net_device *dev)

2193

{

2193

{

2194

if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&

2194

if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&

2195

netif_running(dev)) {

2195

netif_running(dev)) {

2196

netif_tx_stop_all_queues(dev);

2196

netif_tx_stop_all_queues(dev);

2197

}

2197

}

2198

}

2198

}

2199

EXPORT_SYMBOL(netif_device_detach);

2199

EXPORT_SYMBOL(netif_device_detach);

2200

2201

/**

2201

/**

2202

* netif_device_attach - mark device as attached

2202

* netif_device_attach - mark device as attached

2203

* @dev: network device

2203

* @dev: network device

2204

*

2204

*

2205

* Mark device as attached from system and restart if needed.

2205

* Mark device as attached from system and restart if needed.

2206

*/

2206

*/

2207

void netif_device_attach(struct net_device *dev)

2207

void netif_device_attach(struct net_device *dev)

2208

{

2208

{

2209

if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&

2209

if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&

2210

netif_running(dev)) {

2210

netif_running(dev)) {

2211

netif_tx_wake_all_queues(dev);

2211

netif_tx_wake_all_queues(dev);

2212

__netdev_watchdog_up(dev);

2212

__netdev_watchdog_up(dev);

2213

}

2213

}

2214

}

2214

}

2215

EXPORT_SYMBOL(netif_device_attach);

2215

EXPORT_SYMBOL(netif_device_attach);

2216

2217

static void skb_warn_bad_offload(const struct sk_buff *skb)

2217

static void skb_warn_bad_offload(const struct sk_buff *skb)

2218

{

2218

{

2219

static const netdev_features_t null_features = 0;

2219

static const netdev_features_t null_features = 0;

2220

struct net_device *dev = skb->dev;

2220

struct net_device *dev = skb->dev;

2221

const char *driver = "";

2221

const char *driver = "";

2222

2223

if (!net_ratelimit())

2223

if (!net_ratelimit())

2224

return;

2224

return;

2225

2226

if (dev && dev->dev.parent)

2226

if (dev && dev->dev.parent)

2227

driver = dev_driver_string(dev->dev.parent);

2227

driver = dev_driver_string(dev->dev.parent);

2228

2229

WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "

2229

WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "

2230

"gso_type=%d ip_summed=%d\n",

2230

"gso_type=%d ip_summed=%d\n",

2231

driver, dev ? &dev->features : &null_features,

2231

driver, dev ? &dev->features : &null_features,

2232

skb->sk ? &skb->sk->sk_route_caps : &null_features,

2232

skb->sk ? &skb->sk->sk_route_caps : &null_features,

2233

skb->len, skb->data_len, skb_shinfo(skb)->gso_size,

2233

skb->len, skb->data_len, skb_shinfo(skb)->gso_size,

2234

skb_shinfo(skb)->gso_type, skb->ip_summed);

2234

skb_shinfo(skb)->gso_type, skb->ip_summed);

2235

}

2235

}

2236

2237

/*

2237

/*

2238

* Invalidate hardware checksum when packet is to be mangled, and

2238

* Invalidate hardware checksum when packet is to be mangled, and

2239

* complete checksum manually on outgoing path.

2239

* complete checksum manually on outgoing path.

2240

*/

2240

*/

2241

int skb_checksum_help(struct sk_buff *skb)

2241

int skb_checksum_help(struct sk_buff *skb)

2242

{

2242

{

2243

__wsum csum;

2243

__wsum csum;

2244

int ret = 0, offset;

2244

int ret = 0, offset;

2245

2246

if (skb->ip_summed == CHECKSUM_COMPLETE)

2246

if (skb->ip_summed == CHECKSUM_COMPLETE)

2247

goto out_set_summed;

2247

goto out_set_summed;

2248

2249

if (unlikely(skb_shinfo(skb)->gso_size)) {

2249

if (unlikely(skb_shinfo(skb)->gso_size)) {

2250

skb_warn_bad_offload(skb);

2250

skb_warn_bad_offload(skb);

2251

return -EINVAL;

2251

return -EINVAL;

2252

}

2252

}

2253

2254

/* Before computing a checksum, we should make sure no frag could

2254

/* Before computing a checksum, we should make sure no frag could

2255

* be modified by an external entity : checksum could be wrong.

2255

* be modified by an external entity : checksum could be wrong.

2256

*/

2256

*/

2257

if (skb_has_shared_frag(skb)) {

2257

if (skb_has_shared_frag(skb)) {

2258

ret = __skb_linearize(skb);

2258

ret = __skb_linearize(skb);

2259

if (ret)

2259

if (ret)

2260

goto out;

2260

goto out;

2261

}

2261

}

2262

2263

offset = skb_checksum_start_offset(skb);

2263

offset = skb_checksum_start_offset(skb);

2264

BUG_ON(offset >= skb_headlen(skb));

2264

BUG_ON(offset >= skb_headlen(skb));

2265

csum = skb_checksum(skb, offset, skb->len - offset, 0);

2265

csum = skb_checksum(skb, offset, skb->len - offset, 0);

2266

2267

offset += skb->csum_offset;

2267

offset += skb->csum_offset;

2268

BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));

2268

BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));

2269

2270

if (skb_cloned(skb) &&

2270

if (skb_cloned(skb) &&

2271

!skb_clone_writable(skb, offset + sizeof(__sum16))) {

2271

!skb_clone_writable(skb, offset + sizeof(__sum16))) {

2272

ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);

2272

ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);

2273

if (ret)

2273

if (ret)

2274

goto out;

2274

goto out;

2275

}

2275

}

2276

2277

*(__sum16 *)(skb->data + offset) = csum_fold(csum);

2277

*(__sum16 *)(skb->data + offset) = csum_fold(csum);

2278

out_set_summed:

2278

out_set_summed:

2279

skb->ip_summed = CHECKSUM_NONE;

2279

skb->ip_summed = CHECKSUM_NONE;

2280

out:

2280

out:

2281

return ret;

2281

return ret;

2282

}

2282

}

2283

EXPORT_SYMBOL(skb_checksum_help);

2283

EXPORT_SYMBOL(skb_checksum_help);

2284

2285

__be16 skb_network_protocol(struct sk_buff *skb)

2285

__be16 skb_network_protocol(struct sk_buff *skb)

2286

{

2286

{

2287

__be16 type = skb->protocol;

2287

__be16 type = skb->protocol;

2288

int vlan_depth = ETH_HLEN;

2288

int vlan_depth = ETH_HLEN;

2289

2290

/* Tunnel gso handlers can set protocol to ethernet. */

2290

/* Tunnel gso handlers can set protocol to ethernet. */

2291

if (type == htons(ETH_P_TEB)) {

2291

if (type == htons(ETH_P_TEB)) {

2292

struct ethhdr *eth;

2292

struct ethhdr *eth;

2293

2294

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))

2294

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))

2295

return 0;

2295

return 0;

2296

2297

eth = (struct ethhdr *)skb_mac_header(skb);

2297

eth = (struct ethhdr *)skb_mac_header(skb);

2298

type = eth->h_proto;

2298

type = eth->h_proto;

2299

}

2299

}

2300

2301

while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {

2301

while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {

2302

struct vlan_hdr *vh;

2302

struct vlan_hdr *vh;

2303

2304

if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))

2304

if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))

2305

return 0;

2305

return 0;

2306

2307

vh = (struct vlan_hdr *)(skb->data + vlan_depth);

2307

vh = (struct vlan_hdr *)(skb->data + vlan_depth);

2308

type = vh->h_vlan_encapsulated_proto;

2308

type = vh->h_vlan_encapsulated_proto;

2309

vlan_depth += VLAN_HLEN;

2309

vlan_depth += VLAN_HLEN;

2310

}

2310

}

2311

2312

return type;

2312

return type;

2313

}

2313

}

2314

2315

/**

2315

/**

2316

* skb_mac_gso_segment - mac layer segmentation handler.

2316

* skb_mac_gso_segment - mac layer segmentation handler.

2317

* @skb: buffer to segment

2317

* @skb: buffer to segment

2318

* @features: features for the output path (see dev->features)

2318

* @features: features for the output path (see dev->features)

2319

*/

2319

*/

2320

struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,

2320

struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,

2321

netdev_features_t features)

2321

netdev_features_t features)

2322

{

2322

{

2323

struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);

2323

struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);

2324

struct packet_offload *ptype;

2324

struct packet_offload *ptype;

2325

__be16 type = skb_network_protocol(skb);

2325

__be16 type = skb_network_protocol(skb);

2326

2327

if (unlikely(!type))

2327

if (unlikely(!type))

2328

return ERR_PTR(-EINVAL);

2328

return ERR_PTR(-EINVAL);

2329

2330

__skb_pull(skb, skb->mac_len);

2330

__skb_pull(skb, skb->mac_len);

2331

2332

rcu_read_lock();

2332

rcu_read_lock();

2333

list_for_each_entry_rcu(ptype, &offload_base, list) {

2333

list_for_each_entry_rcu(ptype, &offload_base, list) {

2334

if (ptype->type == type && ptype->callbacks.gso_segment) {

2334

if (ptype->type == type && ptype->callbacks.gso_segment) {

2335

if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {

2335

if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {

2336

int err;

2336

int err;

2337

2338

err = ptype->callbacks.gso_send_check(skb);

2338

err = ptype->callbacks.gso_send_check(skb);

2339

segs = ERR_PTR(err);

2339

segs = ERR_PTR(err);

2340

if (err || skb_gso_ok(skb, features))

2340

if (err || skb_gso_ok(skb, features))

2341

break;

2341

break;

2342

__skb_push(skb, (skb->data -

2342

__skb_push(skb, (skb->data -

2343

skb_network_header(skb)));

2343

skb_network_header(skb)));

2344

}

2344

}

2345

segs = ptype->callbacks.gso_segment(skb, features);

2345

segs = ptype->callbacks.gso_segment(skb, features);

2346

break;

2346

break;

2347

}

2347

}

2348

}

2348

}

2349

rcu_read_unlock();

2349

rcu_read_unlock();

2350

2351

__skb_push(skb, skb->data - skb_mac_header(skb));

2351

__skb_push(skb, skb->data - skb_mac_header(skb));

2352

2353

return segs;

2353

return segs;

2354

}

2354

}

2355

EXPORT_SYMBOL(skb_mac_gso_segment);

2355

EXPORT_SYMBOL(skb_mac_gso_segment);

2356

2357

2358

/* openvswitch calls this on rx path, so we need a different check.

2358

/* openvswitch calls this on rx path, so we need a different check.

2359

*/

2359

*/

2360

static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)

2360

static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)

2361

{

2361

{

2362

if (tx_path)

2362

if (tx_path)

2363

return skb->ip_summed != CHECKSUM_PARTIAL;

2363

return skb->ip_summed != CHECKSUM_PARTIAL;

2364

else

2364

else

2365

return skb->ip_summed == CHECKSUM_NONE;

2365

return skb->ip_summed == CHECKSUM_NONE;

2366

}

2366

}

2367

2368

/**

2368

/**

2369

* __skb_gso_segment - Perform segmentation on skb.

2369

* __skb_gso_segment - Perform segmentation on skb.

2370

* @skb: buffer to segment

2370

* @skb: buffer to segment

2371

* @features: features for the output path (see dev->features)

2371

* @features: features for the output path (see dev->features)

2372

* @tx_path: whether it is called in TX path

2372

* @tx_path: whether it is called in TX path

2373

*

2373

*

2374

* This function segments the given skb and returns a list of segments.

2374

* This function segments the given skb and returns a list of segments.

2375

*

2375

*

2376

* It may return NULL if the skb requires no segmentation. This is

2376

* It may return NULL if the skb requires no segmentation. This is

2377

* only possible when GSO is used for verifying header integrity.

2377

* only possible when GSO is used for verifying header integrity.

2378

*/

2378

*/

2379

struct sk_buff *__skb_gso_segment(struct sk_buff *skb,

2379

struct sk_buff *__skb_gso_segment(struct sk_buff *skb,

2380

netdev_features_t features, bool tx_path)

2380

netdev_features_t features, bool tx_path)

2381

{

2381

{

2382

if (unlikely(skb_needs_check(skb, tx_path))) {

2382

if (unlikely(skb_needs_check(skb, tx_path))) {

2383

int err;

2383

int err;

2384

2385

skb_warn_bad_offload(skb);

2385

skb_warn_bad_offload(skb);

2386

2387

if (skb_header_cloned(skb) &&

2387

if (skb_header_cloned(skb) &&

2388

(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))

2388

(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))

2389

return ERR_PTR(err);

2389

return ERR_PTR(err);

2390

}

2390

}

2391

2392

SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);

2392

SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);

2393

SKB_GSO_CB(skb)->encap_level = 0;

2393

SKB_GSO_CB(skb)->encap_level = 0;

2394

2395

skb_reset_mac_header(skb);

2395

skb_reset_mac_header(skb);

2396

skb_reset_mac_len(skb);

2396

skb_reset_mac_len(skb);

2397

2398

return skb_mac_gso_segment(skb, features);

2398

return skb_mac_gso_segment(skb, features);

2399

}

2399

}

2400

EXPORT_SYMBOL(__skb_gso_segment);

2400

EXPORT_SYMBOL(__skb_gso_segment);

2401

2402

/* Take action when hardware reception checksum errors are detected. */

2402

/* Take action when hardware reception checksum errors are detected. */

2403

#ifdef CONFIG_BUG

2403

#ifdef CONFIG_BUG

2404

void netdev_rx_csum_fault(struct net_device *dev)

2404

void netdev_rx_csum_fault(struct net_device *dev)

2405

{

2405

{

2406

if (net_ratelimit()) {

2406

if (net_ratelimit()) {

2407

pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");

2407

pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");

2408

dump_stack();

2408

dump_stack();

2409

}

2409

}

2410

}

2410

}

2411

EXPORT_SYMBOL(netdev_rx_csum_fault);

2411

EXPORT_SYMBOL(netdev_rx_csum_fault);

2412

#endif

2412

#endif

2413

2414

/* Actually, we should eliminate this check as soon as we know, that:

2414

/* Actually, we should eliminate this check as soon as we know, that:

2415

* 1. IOMMU is present and allows to map all the memory.

2415

* 1. IOMMU is present and allows to map all the memory.

2416

* 2. No high memory really exists on this machine.

2416

* 2. No high memory really exists on this machine.

2417

*/

2417

*/

2418

2419

static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)

2419

static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)

2420

{

2420

{

2421

#ifdef CONFIG_HIGHMEM

2421

#ifdef CONFIG_HIGHMEM

2422

int i;

2422

int i;

2423

if (!(dev->features & NETIF_F_HIGHDMA)) {

2423

if (!(dev->features & NETIF_F_HIGHDMA)) {

2424

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2424

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2425

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2425

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2426

if (PageHighMem(skb_frag_page(frag)))

2426

if (PageHighMem(skb_frag_page(frag)))

2427

return 1;

2427

return 1;

2428

}

2428

}

2429

}

2429

}

2430

2431

if (PCI_DMA_BUS_IS_PHYS) {

2431

if (PCI_DMA_BUS_IS_PHYS) {

2432

struct device *pdev = dev->dev.parent;

2432

struct device *pdev = dev->dev.parent;

2433

2434

if (!pdev)

2434

if (!pdev)

2435

return 0;

2435

return 0;

2436

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2436

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2437

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2437

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2438

dma_addr_t addr = page_to_phys(skb_frag_page(frag));

2438

dma_addr_t addr = page_to_phys(skb_frag_page(frag));

2439

if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)

2439

if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)

2440

return 1;

2440

return 1;

2441

}

2441

}

2442

}

2442

}

2443

#endif

2443

#endif

2444

return 0;

2444

return 0;

2445

}

2445

}

2446

2447

struct dev_gso_cb {

2447

struct dev_gso_cb {

2448

void (*destructor)(struct sk_buff *skb);

2448

void (*destructor)(struct sk_buff *skb);

2449

};

2449

};

2450

2451

#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)

2451

#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)

2452

2453

static void dev_gso_skb_destructor(struct sk_buff *skb)

2453

static void dev_gso_skb_destructor(struct sk_buff *skb)

2454

{

2454

{

2455

struct dev_gso_cb *cb;

2455

struct dev_gso_cb *cb;

2456

2457

do {

2457

do {

2458

struct sk_buff *nskb = skb->next;

2458

struct sk_buff *nskb = skb->next;

2459

2460

skb->next = nskb->next;

2460

skb->next = nskb->next;

2461

nskb->next = NULL;

2461

nskb->next = NULL;

2462

kfree_skb(nskb);

2462

kfree_skb(nskb);

2463

} while (skb->next);

2463

} while (skb->next);

2464

2465

cb = DEV_GSO_CB(skb);

2465

cb = DEV_GSO_CB(skb);

2466

if (cb->destructor)

2466

if (cb->destructor)

2467

cb->destructor(skb);

2467

cb->destructor(skb);

2468

}

2468

}

2469

2470

/**

2470

/**

2471

* dev_gso_segment - Perform emulated hardware segmentation on skb.

2471

* dev_gso_segment - Perform emulated hardware segmentation on skb.

2472

* @skb: buffer to segment

2472

* @skb: buffer to segment

2473

* @features: device features as applicable to this skb

2473

* @features: device features as applicable to this skb

2474

*

2474

*

2475

* This function segments the given skb and stores the list of segments

2475

* This function segments the given skb and stores the list of segments

2476

* in skb->next.

2476

* in skb->next.

2477

*/

2477

*/

2478

static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)

2478

static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)

2479

{

2479

{

2480

struct sk_buff *segs;

2480

struct sk_buff *segs;

2481

2482

segs = skb_gso_segment(skb, features);

2482

segs = skb_gso_segment(skb, features);

2483

2484

/* Verifying header integrity only. */

2484

/* Verifying header integrity only. */

2485

if (!segs)

2485

if (!segs)

2486

return 0;

2486

return 0;

2487

2488

if (IS_ERR(segs))

2488

if (IS_ERR(segs))

2489

return PTR_ERR(segs);

2489

return PTR_ERR(segs);

2490

2491

skb->next = segs;

2491

skb->next = segs;

2492

DEV_GSO_CB(skb)->destructor = skb->destructor;

2492

DEV_GSO_CB(skb)->destructor = skb->destructor;

2493

skb->destructor = dev_gso_skb_destructor;

2493

skb->destructor = dev_gso_skb_destructor;

2494

2495

return 0;

2495

return 0;

2496

}

2496

}

2497

2498

static netdev_features_t harmonize_features(struct sk_buff *skb,

2498

static netdev_features_t harmonize_features(struct sk_buff *skb,

2499

netdev_features_t features)

2499

netdev_features_t features)

2500

{

2500

{

2501

if (skb->ip_summed != CHECKSUM_NONE &&

2501

if (skb->ip_summed != CHECKSUM_NONE &&

2502

!can_checksum_protocol(features, skb_network_protocol(skb))) {

2502

!can_checksum_protocol(features, skb_network_protocol(skb))) {

2503

features &= ~NETIF_F_ALL_CSUM;

2503

features &= ~NETIF_F_ALL_CSUM;

2504

} else if (illegal_highdma(skb->dev, skb)) {

2504

} else if (illegal_highdma(skb->dev, skb)) {

2505

features &= ~NETIF_F_SG;

2505

features &= ~NETIF_F_SG;

2506

}

2506

}

2507

2508

return features;

2508

return features;

2509

}

2509

}

2510

2511

netdev_features_t netif_skb_features(struct sk_buff *skb)

2511

netdev_features_t netif_skb_features(struct sk_buff *skb)

2512

{

2512

{

2513

__be16 protocol = skb->protocol;

2513

__be16 protocol = skb->protocol;

2514

netdev_features_t features = skb->dev->features;

2514

netdev_features_t features = skb->dev->features;

2515

2516

if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)

2516

if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)

2517

features &= ~NETIF_F_GSO_MASK;

2517

features &= ~NETIF_F_GSO_MASK;

2518

2519

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {

2519

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {

2520

struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;

2520

struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;

2521

protocol = veh->h_vlan_encapsulated_proto;

2521

protocol = veh->h_vlan_encapsulated_proto;

2522

} else if (!vlan_tx_tag_present(skb)) {

2522

} else if (!vlan_tx_tag_present(skb)) {

2523

return harmonize_features(skb, features);

2523

return harmonize_features(skb, features);

2524

}

2524

}

2525

2526

features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |

2526

features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |

2527

NETIF_F_HW_VLAN_STAG_TX);

2527

NETIF_F_HW_VLAN_STAG_TX);

2528

2529

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))

2529

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))

2530

features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |

2530

features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |

2531

NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |

2531

NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |

2532

NETIF_F_HW_VLAN_STAG_TX;

2532

NETIF_F_HW_VLAN_STAG_TX;

2533

2534

return harmonize_features(skb, features);

2534

return harmonize_features(skb, features);

2535

}

2535

}

2536

EXPORT_SYMBOL(netif_skb_features);

2536

EXPORT_SYMBOL(netif_skb_features);

2537

2538

/*

2539

* Returns true if either:

2540

* 1. skb has frag_list and the device doesn't support FRAGLIST, or

2541

* 2. skb is fragmented and the device does not support SG.

2542

*/

2543

static inline int skb_needs_linearize(struct sk_buff *skb,

2544

netdev_features_t features)

2545

{

2546

return skb_is_nonlinear(skb) &&

2547

((skb_has_frag_list(skb) &&

2548

!(features & NETIF_F_FRAGLIST)) ||

2549

(skb_shinfo(skb)->nr_frags &&

2550

!(features & NETIF_F_SG)));

2551

}

2552

2553

int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,

2538

int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,

2554

struct netdev_queue *txq, void *accel_priv)

2539

struct netdev_queue *txq, void *accel_priv)

2555

{

2540

{

2556

const struct net_device_ops *ops = dev->netdev_ops;

2541

const struct net_device_ops *ops = dev->netdev_ops;

2557

int rc = NETDEV_TX_OK;

2542

int rc = NETDEV_TX_OK;

2558

unsigned int skb_len;

2543

unsigned int skb_len;

2559

2544

2560

if (likely(!skb->next)) {

2545

if (likely(!skb->next)) {

2561

netdev_features_t features;

2546

netdev_features_t features;

2562

2547

2563

/*

2548

/*

2564

* If device doesn't need skb->dst, release it right now while

2549

* If device doesn't need skb->dst, release it right now while

2565

* its hot in this cpu cache

2550

* its hot in this cpu cache

2566

*/

2551

*/

2567

if (dev->priv_flags & IFF_XMIT_DST_RELEASE)

2552

if (dev->priv_flags & IFF_XMIT_DST_RELEASE)

2568

skb_dst_drop(skb);

2553

skb_dst_drop(skb);

2569

2554

2570

features = netif_skb_features(skb);

2555

features = netif_skb_features(skb);

2571

2556

2572

if (vlan_tx_tag_present(skb) &&

2557

if (vlan_tx_tag_present(skb) &&

2573

!vlan_hw_offload_capable(features, skb->vlan_proto)) {

2558

!vlan_hw_offload_capable(features, skb->vlan_proto)) {

2574

skb = __vlan_put_tag(skb, skb->vlan_proto,

2559

skb = __vlan_put_tag(skb, skb->vlan_proto,

2575

vlan_tx_tag_get(skb));

2560

vlan_tx_tag_get(skb));

2576

if (unlikely(!skb))

2561

if (unlikely(!skb))

2577

goto out;

2562

goto out;

2578

2563

2579

skb->vlan_tci = 0;

2564

skb->vlan_tci = 0;

2580

}

2565

}

2581

2566

2582

/* If encapsulation offload request, verify we are testing

2567

/* If encapsulation offload request, verify we are testing

2583

* hardware encapsulation features instead of standard

2568

* hardware encapsulation features instead of standard

2584

* features for the netdev

2569

* features for the netdev

2585

*/

2570

*/

2586

if (skb->encapsulation)

2571

if (skb->encapsulation)

2587

features &= dev->hw_enc_features;

2572

features &= dev->hw_enc_features;

2588

2573

2589

if (netif_needs_gso(skb, features)) {

2574

if (netif_needs_gso(skb, features)) {

2590

if (unlikely(dev_gso_segment(skb, features)))

2575

if (unlikely(dev_gso_segment(skb, features)))

2591

goto out_kfree_skb;

2576

goto out_kfree_skb;

2592

if (skb->next)

2577

if (skb->next)

2593

goto gso;

2578

goto gso;

2594

} else {

2579

} else {

2595

if (skb_needs_linearize(skb, features) &&

2580

if (skb_needs_linearize(skb, features) &&

2596

__skb_linearize(skb))

2581

__skb_linearize(skb))

2597

goto out_kfree_skb;

2582

goto out_kfree_skb;

2598

2583

2599

/* If packet is not checksummed and device does not

2584

/* If packet is not checksummed and device does not

2600

* support checksumming for this protocol, complete

2585

* support checksumming for this protocol, complete

2601

* checksumming here.

2586

* checksumming here.

2602

*/

2587

*/

2603

if (skb->ip_summed == CHECKSUM_PARTIAL) {

2588

if (skb->ip_summed == CHECKSUM_PARTIAL) {

2604

if (skb->encapsulation)

2589

if (skb->encapsulation)

2605

skb_set_inner_transport_header(skb,

2590

skb_set_inner_transport_header(skb,

2606

skb_checksum_start_offset(skb));

2591

skb_checksum_start_offset(skb));

2607

else

2592

else

2608

skb_set_transport_header(skb,

2593

skb_set_transport_header(skb,

2609

skb_checksum_start_offset(skb));

2594

skb_checksum_start_offset(skb));

2610

if (!(features & NETIF_F_ALL_CSUM) &&

2595

if (!(features & NETIF_F_ALL_CSUM) &&

2611

skb_checksum_help(skb))

2596

skb_checksum_help(skb))

2612

goto out_kfree_skb;

2597

goto out_kfree_skb;

2613

}

2598

}

2614

}

2599

}

2615

2600

2616

if (!list_empty(&ptype_all))

2601

if (!list_empty(&ptype_all))

2617

dev_queue_xmit_nit(skb, dev);

2602

dev_queue_xmit_nit(skb, dev);

2618

2603

2619

skb_len = skb->len;

2604

skb_len = skb->len;

2620

if (accel_priv)

2605

if (accel_priv)

2621

rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);

2606

rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);

2622

else

2607

else

2623

rc = ops->ndo_start_xmit(skb, dev);

2608

rc = ops->ndo_start_xmit(skb, dev);

2624

2609

2625

trace_net_dev_xmit(skb, rc, dev, skb_len);

2610

trace_net_dev_xmit(skb, rc, dev, skb_len);

2626

if (rc == NETDEV_TX_OK && txq)

2611

if (rc == NETDEV_TX_OK && txq)

2627

txq_trans_update(txq);

2612

txq_trans_update(txq);

2628

return rc;

2613

return rc;

2629

}

2614

}

2630

2615

2631

gso:

2616

gso:

2632

do {

2617

do {

2633

struct sk_buff *nskb = skb->next;

2618

struct sk_buff *nskb = skb->next;

2634

2619

2635

skb->next = nskb->next;

2620

skb->next = nskb->next;

2636

nskb->next = NULL;

2621

nskb->next = NULL;

2637

2622

2638

if (!list_empty(&ptype_all))

2623

if (!list_empty(&ptype_all))

2639

dev_queue_xmit_nit(nskb, dev);

2624

dev_queue_xmit_nit(nskb, dev);

2640

2625

2641

skb_len = nskb->len;

2626

skb_len = nskb->len;

2642

if (accel_priv)

2627

if (accel_priv)

2643

rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);

2628

rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);

2644

else

2629

else

2645

rc = ops->ndo_start_xmit(nskb, dev);

2630

rc = ops->ndo_start_xmit(nskb, dev);

2646

trace_net_dev_xmit(nskb, rc, dev, skb_len);

2631

trace_net_dev_xmit(nskb, rc, dev, skb_len);

2647

if (unlikely(rc != NETDEV_TX_OK)) {

2632

if (unlikely(rc != NETDEV_TX_OK)) {

2648

if (rc & ~NETDEV_TX_MASK)

2633

if (rc & ~NETDEV_TX_MASK)

2649

goto out_kfree_gso_skb;

2634

goto out_kfree_gso_skb;

2650

nskb->next = skb->next;

2635

nskb->next = skb->next;

2651

skb->next = nskb;

2636

skb->next = nskb;

2652

return rc;

2637

return rc;

2653

}

2638

}

2654

txq_trans_update(txq);

2639

txq_trans_update(txq);

2655

if (unlikely(netif_xmit_stopped(txq) && skb->next))

2640

if (unlikely(netif_xmit_stopped(txq) && skb->next))

2656

return NETDEV_TX_BUSY;

2641

return NETDEV_TX_BUSY;

2657

} while (skb->next);

2642

} while (skb->next);

2658

2643

2659

out_kfree_gso_skb:

2644

out_kfree_gso_skb:

2660

if (likely(skb->next == NULL)) {

2645

if (likely(skb->next == NULL)) {

2661

skb->destructor = DEV_GSO_CB(skb)->destructor;

2646

skb->destructor = DEV_GSO_CB(skb)->destructor;

2662

consume_skb(skb);

2647

consume_skb(skb);

2663

return rc;

2648

return rc;

2664

}

2649

}

2665

out_kfree_skb:

2650

out_kfree_skb:

2666

kfree_skb(skb);

2651

kfree_skb(skb);

2667

out:

2652

out:

2668

return rc;

2653

return rc;

2669

}

2654

}

2670

EXPORT_SYMBOL_GPL(dev_hard_start_xmit);

2655

EXPORT_SYMBOL_GPL(dev_hard_start_xmit);

2671

2656

2672

static void qdisc_pkt_len_init(struct sk_buff *skb)

2657

static void qdisc_pkt_len_init(struct sk_buff *skb)

2673

{

2658

{

2674

const struct skb_shared_info *shinfo = skb_shinfo(skb);

2659

const struct skb_shared_info *shinfo = skb_shinfo(skb);

2675

2660

2676

qdisc_skb_cb(skb)->pkt_len = skb->len;

2661

qdisc_skb_cb(skb)->pkt_len = skb->len;

2677

2662

2678

/* To get more precise estimation of bytes sent on wire,

2663

/* To get more precise estimation of bytes sent on wire,

2679

* we add to pkt_len the headers size of all segments

2664

* we add to pkt_len the headers size of all segments

2680

*/

2665

*/

2681

if (shinfo->gso_size) {

2666

if (shinfo->gso_size) {

2682

unsigned int hdr_len;

2667

unsigned int hdr_len;

2683

u16 gso_segs = shinfo->gso_segs;

2668

u16 gso_segs = shinfo->gso_segs;

2684

2669

2685

/* mac layer + network layer */

2670

/* mac layer + network layer */

2686

hdr_len = skb_transport_header(skb) - skb_mac_header(skb);

2671

hdr_len = skb_transport_header(skb) - skb_mac_header(skb);

2687

2672

2688

/* + transport layer */

2673

/* + transport layer */

2689

if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))

2674

if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))

2690

hdr_len += tcp_hdrlen(skb);

2675

hdr_len += tcp_hdrlen(skb);

2691

else

2676

else

2692

hdr_len += sizeof(struct udphdr);

2677

hdr_len += sizeof(struct udphdr);

2693

2678

2694

if (shinfo->gso_type & SKB_GSO_DODGY)

2679

if (shinfo->gso_type & SKB_GSO_DODGY)

2695

gso_segs = DIV_ROUND_UP(skb->len - hdr_len,

2680

gso_segs = DIV_ROUND_UP(skb->len - hdr_len,

2696

shinfo->gso_size);

2681

shinfo->gso_size);

2697

2682

2698

qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;

2683

qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;

2699

}

2684

}

2700

}

2685

}

2701

2686

2702

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

2687

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

2703

struct net_device *dev,

2688

struct net_device *dev,

2704

struct netdev_queue *txq)

2689

struct netdev_queue *txq)

2705

{

2690

{

2706

spinlock_t *root_lock = qdisc_lock(q);

2691

spinlock_t *root_lock = qdisc_lock(q);

2707

bool contended;

2692

bool contended;

2708

int rc;

2693

int rc;

2709

2694

2710

qdisc_pkt_len_init(skb);

2695

qdisc_pkt_len_init(skb);

2711

qdisc_calculate_pkt_len(skb, q);

2696

qdisc_calculate_pkt_len(skb, q);

2712

/*

2697

/*

2713

* Heuristic to force contended enqueues to serialize on a

2698

* Heuristic to force contended enqueues to serialize on a

2714

* separate lock before trying to get qdisc main lock.

2699

* separate lock before trying to get qdisc main lock.

2715

* This permits __QDISC_STATE_RUNNING owner to get the lock more often

2700

* This permits __QDISC_STATE_RUNNING owner to get the lock more often

2716

* and dequeue packets faster.

2701

* and dequeue packets faster.

2717

*/

2702

*/

2718

contended = qdisc_is_running(q);

2703

contended = qdisc_is_running(q);

2719

if (unlikely(contended))

2704

if (unlikely(contended))

2720

spin_lock(&q->busylock);

2705

spin_lock(&q->busylock);

2721

2706

2722

spin_lock(root_lock);

2707

spin_lock(root_lock);

2723

if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {

2708

if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {

2724

kfree_skb(skb);

2709

kfree_skb(skb);

2725

rc = NET_XMIT_DROP;

2710

rc = NET_XMIT_DROP;

2726

} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&

2711

} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&

2727

qdisc_run_begin(q)) {

2712

qdisc_run_begin(q)) {

2728

/*

2713

/*

2729

* This is a work-conserving queue; there are no old skbs

2714

* This is a work-conserving queue; there are no old skbs

2730

* waiting to be sent out; and the qdisc is not running -

2715

* waiting to be sent out; and the qdisc is not running -

2731

* xmit the skb directly.

2716

* xmit the skb directly.

2732

*/

2717

*/

2733

if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))

2718

if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))

2734

skb_dst_force(skb);

2719

skb_dst_force(skb);

2735

2720

2736

qdisc_bstats_update(q, skb);

2721

qdisc_bstats_update(q, skb);

2737

2722

2738

if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {

2723

if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {

2739

if (unlikely(contended)) {

2724

if (unlikely(contended)) {

2740

spin_unlock(&q->busylock);

2725

spin_unlock(&q->busylock);

2741

contended = false;

2726

contended = false;

2742

}

2727

}

2743

__qdisc_run(q);

2728

__qdisc_run(q);

2744

} else

2729

} else

2745

qdisc_run_end(q);

2730

qdisc_run_end(q);

2746

2731

2747

rc = NET_XMIT_SUCCESS;

2732

rc = NET_XMIT_SUCCESS;

2748

} else {

2733

} else {

2749

skb_dst_force(skb);

2734

skb_dst_force(skb);

2750

rc = q->enqueue(skb, q) & NET_XMIT_MASK;

2735

rc = q->enqueue(skb, q) & NET_XMIT_MASK;

2751

if (qdisc_run_begin(q)) {

2736

if (qdisc_run_begin(q)) {

2752

if (unlikely(contended)) {

2737

if (unlikely(contended)) {

2753

spin_unlock(&q->busylock);

2738

spin_unlock(&q->busylock);

2754

contended = false;

2739

contended = false;

2755

}

2740

}

2756

__qdisc_run(q);

2741

__qdisc_run(q);

2757

}

2742

}

2758

}

2743

}

2759

spin_unlock(root_lock);

2744

spin_unlock(root_lock);

2760

if (unlikely(contended))

2745

if (unlikely(contended))

2761

spin_unlock(&q->busylock);

2746

spin_unlock(&q->busylock);

2762

return rc;

2747

return rc;

2763

}

2748

}

2764

2749

2765

#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)

2750

#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)

2766

static void skb_update_prio(struct sk_buff *skb)

2751

static void skb_update_prio(struct sk_buff *skb)

2767

{

2752

{

2768

struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);

2753

struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);

2769

2754

2770

if (!skb->priority && skb->sk && map) {

2755

if (!skb->priority && skb->sk && map) {

2771

unsigned int prioidx = skb->sk->sk_cgrp_prioidx;

2756

unsigned int prioidx = skb->sk->sk_cgrp_prioidx;

2772

2757

2773

if (prioidx < map->priomap_len)

2758

if (prioidx < map->priomap_len)

2774

skb->priority = map->priomap[prioidx];

2759

skb->priority = map->priomap[prioidx];

2775

}

2760

}

2776

}

2761

}

2777

#else

2762

#else

2778

#define skb_update_prio(skb)

2763

#define skb_update_prio(skb)

2779

#endif

2764

#endif

2780

2765

2781

static DEFINE_PER_CPU(int, xmit_recursion);

2766

static DEFINE_PER_CPU(int, xmit_recursion);

2782

#define RECURSION_LIMIT 10

2767

#define RECURSION_LIMIT 10

2783

2768

2784

/**

2769

/**

2785

* dev_loopback_xmit - loop back @skb

2770

* dev_loopback_xmit - loop back @skb

2786

* @skb: buffer to transmit

2771

* @skb: buffer to transmit

2787

*/

2772

*/

2788

int dev_loopback_xmit(struct sk_buff *skb)

2773

int dev_loopback_xmit(struct sk_buff *skb)

2789

{

2774

{

2790

skb_reset_mac_header(skb);

2775

skb_reset_mac_header(skb);

2791

__skb_pull(skb, skb_network_offset(skb));

2776

__skb_pull(skb, skb_network_offset(skb));

2792

skb->pkt_type = PACKET_LOOPBACK;

2777

skb->pkt_type = PACKET_LOOPBACK;

2793

skb->ip_summed = CHECKSUM_UNNECESSARY;

2778

skb->ip_summed = CHECKSUM_UNNECESSARY;

2794

WARN_ON(!skb_dst(skb));

2779

WARN_ON(!skb_dst(skb));

2795

skb_dst_force(skb);

2780

skb_dst_force(skb);

2796

netif_rx_ni(skb);

2781

netif_rx_ni(skb);

2797

return 0;

2782

return 0;

2798

}

2783

}

2799

EXPORT_SYMBOL(dev_loopback_xmit);

2784

EXPORT_SYMBOL(dev_loopback_xmit);

2800

2785

2801

/**

2786

/**

2802

* dev_queue_xmit - transmit a buffer

2787

* dev_queue_xmit - transmit a buffer

2803

* @skb: buffer to transmit

2788

* @skb: buffer to transmit

2804

*

2789

*

2805

* Queue a buffer for transmission to a network device. The caller must

2790

* Queue a buffer for transmission to a network device. The caller must

2806

* have set the device and priority and built the buffer before calling

2791

* have set the device and priority and built the buffer before calling

2807

* this function. The function can be called from an interrupt.

2792

* this function. The function can be called from an interrupt.

2808

*

2793

*

2809

* A negative errno code is returned on a failure. A success does not

2794

* A negative errno code is returned on a failure. A success does not

2810

* guarantee the frame will be transmitted as it may be dropped due

2795

* guarantee the frame will be transmitted as it may be dropped due

2811

* to congestion or traffic shaping.

2796

* to congestion or traffic shaping.

2812

*

2797

*

2813

* -----------------------------------------------------------------------------------

2798

* -----------------------------------------------------------------------------------

2814

* I notice this method can also return errors from the queue disciplines,

2799

* I notice this method can also return errors from the queue disciplines,

2815

* including NET_XMIT_DROP, which is a positive value. So, errors can also

2800

* including NET_XMIT_DROP, which is a positive value. So, errors can also

2816

* be positive.

2801

* be positive.

2817

*

2802

*

2818

* Regardless of the return value, the skb is consumed, so it is currently

2803

* Regardless of the return value, the skb is consumed, so it is currently

2819

* difficult to retry a send to this method. (You can bump the ref count

2804

* difficult to retry a send to this method. (You can bump the ref count

2820

* before sending to hold a reference for retry if you are careful.)

2805

* before sending to hold a reference for retry if you are careful.)

2821

*

2806

*

2822

* When calling this method, interrupts MUST be enabled. This is because

2807

* When calling this method, interrupts MUST be enabled. This is because

2823

* the BH enable code must have IRQs enabled so that it will not deadlock.

2808

* the BH enable code must have IRQs enabled so that it will not deadlock.

2824

* --BLG

2809

* --BLG

2825

*/

2810

*/

2826

int dev_queue_xmit(struct sk_buff *skb)

2811

int dev_queue_xmit(struct sk_buff *skb)

2827

{

2812

{

2828

struct net_device *dev = skb->dev;

2813

struct net_device *dev = skb->dev;

2829

struct netdev_queue *txq;

2814

struct netdev_queue *txq;

2830

struct Qdisc *q;

2815

struct Qdisc *q;

2831

int rc = -ENOMEM;

2816

int rc = -ENOMEM;

2832

2817

2833

skb_reset_mac_header(skb);

2818

skb_reset_mac_header(skb);

2834

2819

2835

/* Disable soft irqs for various locks below. Also

2820

/* Disable soft irqs for various locks below. Also

2836

* stops preemption for RCU.

2821

* stops preemption for RCU.

2837

*/

2822

*/

2838

rcu_read_lock_bh();

2823

rcu_read_lock_bh();

2839

2824

2840

skb_update_prio(skb);

2825

skb_update_prio(skb);

2841

2826

2842

txq = netdev_pick_tx(dev, skb);

2827

txq = netdev_pick_tx(dev, skb);

2843

q = rcu_dereference_bh(txq->qdisc);

2828

q = rcu_dereference_bh(txq->qdisc);

2844

2829

2845

#ifdef CONFIG_NET_CLS_ACT

2830

#ifdef CONFIG_NET_CLS_ACT

2846

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);

2831

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);

2847

#endif

2832

#endif

2848

trace_net_dev_queue(skb);

2833

trace_net_dev_queue(skb);

2849

if (q->enqueue) {

2834

if (q->enqueue) {

2850

rc = __dev_xmit_skb(skb, q, dev, txq);

2835

rc = __dev_xmit_skb(skb, q, dev, txq);

2851

goto out;

2836

goto out;

2852

}

2837

}

2853

2838

2854

/* The device has no queue. Common case for software devices:

2839

/* The device has no queue. Common case for software devices:

2855

loopback, all the sorts of tunnels...

2840

loopback, all the sorts of tunnels...

2856

2841

2857

Really, it is unlikely that netif_tx_lock protection is necessary

2842

Really, it is unlikely that netif_tx_lock protection is necessary

2858

here. (f.e. loopback and IP tunnels are clean ignoring statistics

2843

here. (f.e. loopback and IP tunnels are clean ignoring statistics

2859

counters.)

2844

counters.)

2860

However, it is possible, that they rely on protection

2845

However, it is possible, that they rely on protection

2861

made by us here.

2846

made by us here.

2862

2847

2863

Check this and shot the lock. It is not prone from deadlocks.

2848

Check this and shot the lock. It is not prone from deadlocks.

2864

Either shot noqueue qdisc, it is even simpler 8)

2849

Either shot noqueue qdisc, it is even simpler 8)

2865

*/

2850

*/

2866

if (dev->flags & IFF_UP) {

2851

if (dev->flags & IFF_UP) {

2867

int cpu = smp_processor_id(); /* ok because BHs are off */

2852

int cpu = smp_processor_id(); /* ok because BHs are off */

2868

2853

2869

if (txq->xmit_lock_owner != cpu) {

2854

if (txq->xmit_lock_owner != cpu) {

2870

2855

2871

if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)

2856

if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)

2872

goto recursion_alert;

2857

goto recursion_alert;

2873

2858

2874

HARD_TX_LOCK(dev, txq, cpu);

2859

HARD_TX_LOCK(dev, txq, cpu);

2875

2860

2876

if (!netif_xmit_stopped(txq)) {

2861

if (!netif_xmit_stopped(txq)) {

2877

__this_cpu_inc(xmit_recursion);

2862

__this_cpu_inc(xmit_recursion);

2878

rc = dev_hard_start_xmit(skb, dev, txq, NULL);

2863

rc = dev_hard_start_xmit(skb, dev, txq, NULL);

2879

__this_cpu_dec(xmit_recursion);

2864

__this_cpu_dec(xmit_recursion);

2880

if (dev_xmit_complete(rc)) {

2865

if (dev_xmit_complete(rc)) {

2881

HARD_TX_UNLOCK(dev, txq);

2866

HARD_TX_UNLOCK(dev, txq);

2882

goto out;

2867

goto out;

2883

}

2868

}

2884

}

2869

}

2885

HARD_TX_UNLOCK(dev, txq);

2870

HARD_TX_UNLOCK(dev, txq);

2886

net_crit_ratelimited("Virtual device %s asks to queue packet!\n",

2871

net_crit_ratelimited("Virtual device %s asks to queue packet!\n",

2887

dev->name);

2872

dev->name);

2888

} else {

2873

} else {

2889

/* Recursion is detected! It is possible,

2874

/* Recursion is detected! It is possible,

2890

* unfortunately

2875

* unfortunately

2891

*/

2876

*/

2892

recursion_alert:

2877

recursion_alert:

2893

net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",

2878

net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",

2894

dev->name);

2879

dev->name);

2895

}

2880

}

2896

}

2881

}

2897

2882

2898

rc = -ENETDOWN;

2883

rc = -ENETDOWN;

2899

rcu_read_unlock_bh();

2884

rcu_read_unlock_bh();

2900

2885

2901

kfree_skb(skb);

2886

kfree_skb(skb);

2902

return rc;

2887

return rc;

2903

out:

2888

out:

2904

rcu_read_unlock_bh();

2889

rcu_read_unlock_bh();

2905

return rc;

2890

return rc;

2906

}

2891

}

2907

EXPORT_SYMBOL(dev_queue_xmit);

2892

EXPORT_SYMBOL(dev_queue_xmit);

2908

2893

2909

2894

2910

/*=======================================================================

2895

/*=======================================================================

2911

Receiver routines

2896

Receiver routines

2912

=======================================================================*/

2897

=======================================================================*/

2913

2898

2914

int netdev_max_backlog __read_mostly = 1000;

2899

int netdev_max_backlog __read_mostly = 1000;

2915

EXPORT_SYMBOL(netdev_max_backlog);

2900

EXPORT_SYMBOL(netdev_max_backlog);

2916

2901

2917

int netdev_tstamp_prequeue __read_mostly = 1;

2902

int netdev_tstamp_prequeue __read_mostly = 1;

2918

int netdev_budget __read_mostly = 300;

2903

int netdev_budget __read_mostly = 300;

2919

int weight_p __read_mostly = 64; /* old backlog weight */

2904

int weight_p __read_mostly = 64; /* old backlog weight */

2920

2905

2921

/* Called with irq disabled */

2906

/* Called with irq disabled */

2922

static inline void ____napi_schedule(struct softnet_data *sd,

2907

static inline void ____napi_schedule(struct softnet_data *sd,

2923

struct napi_struct *napi)

2908

struct napi_struct *napi)

2924

{

2909

{

2925

list_add_tail(&napi->poll_list, &sd->poll_list);

2910

list_add_tail(&napi->poll_list, &sd->poll_list);

2926

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

2911

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

2927

}

2912

}

2928

2913

2929

#ifdef CONFIG_RPS

2914

#ifdef CONFIG_RPS

2930

2915

2931

/* One global table that all flow-based protocols share. */

2916

/* One global table that all flow-based protocols share. */

2932

struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;

2917

struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;

2933

EXPORT_SYMBOL(rps_sock_flow_table);

2918

EXPORT_SYMBOL(rps_sock_flow_table);

2934

2919

2935

struct static_key rps_needed __read_mostly;

2920

struct static_key rps_needed __read_mostly;

2936

2921

2937

static struct rps_dev_flow *

2922

static struct rps_dev_flow *

2938

set_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2923

set_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2939

struct rps_dev_flow *rflow, u16 next_cpu)

2924

struct rps_dev_flow *rflow, u16 next_cpu)

2940

{

2925

{

2941

if (next_cpu != RPS_NO_CPU) {

2926

if (next_cpu != RPS_NO_CPU) {

2942

#ifdef CONFIG_RFS_ACCEL

2927

#ifdef CONFIG_RFS_ACCEL

2943

struct netdev_rx_queue *rxqueue;

2928

struct netdev_rx_queue *rxqueue;

2944

struct rps_dev_flow_table *flow_table;

2929

struct rps_dev_flow_table *flow_table;

2945

struct rps_dev_flow *old_rflow;

2930

struct rps_dev_flow *old_rflow;

2946

u32 flow_id;

2931

u32 flow_id;

2947

u16 rxq_index;

2932

u16 rxq_index;

2948

int rc;

2933

int rc;

2949

2934

2950

/* Should we steer this flow to a different hardware queue? */

2935

/* Should we steer this flow to a different hardware queue? */

2951

if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||

2936

if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||

2952

!(dev->features & NETIF_F_NTUPLE))

2937

!(dev->features & NETIF_F_NTUPLE))

2953

goto out;

2938

goto out;

2954

rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);

2939

rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);

2955

if (rxq_index == skb_get_rx_queue(skb))

2940

if (rxq_index == skb_get_rx_queue(skb))

2956

goto out;

2941

goto out;

2957

2942

2958

rxqueue = dev->_rx + rxq_index;

2943

rxqueue = dev->_rx + rxq_index;

2959

flow_table = rcu_dereference(rxqueue->rps_flow_table);

2944

flow_table = rcu_dereference(rxqueue->rps_flow_table);

2960

if (!flow_table)

2945

if (!flow_table)

2961

goto out;

2946

goto out;

2962

flow_id = skb->rxhash & flow_table->mask;

2947

flow_id = skb->rxhash & flow_table->mask;

2963

rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,

2948

rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,

2964

rxq_index, flow_id);

2949

rxq_index, flow_id);

2965

if (rc < 0)

2950

if (rc < 0)

2966

goto out;

2951

goto out;

2967

old_rflow = rflow;

2952

old_rflow = rflow;

2968

rflow = &flow_table->flows[flow_id];

2953

rflow = &flow_table->flows[flow_id];

2969

rflow->filter = rc;

2954

rflow->filter = rc;

2970

if (old_rflow->filter == rflow->filter)

2955

if (old_rflow->filter == rflow->filter)

2971

old_rflow->filter = RPS_NO_FILTER;

2956

old_rflow->filter = RPS_NO_FILTER;

2972

out:

2957

out:

2973

#endif

2958

#endif

2974

rflow->last_qtail =

2959

rflow->last_qtail =

2975

per_cpu(softnet_data, next_cpu).input_queue_head;

2960

per_cpu(softnet_data, next_cpu).input_queue_head;

2976

}

2961

}

2977

2962

2978

rflow->cpu = next_cpu;

2963

rflow->cpu = next_cpu;

2979

return rflow;

2964

return rflow;

2980

}

2965

}

2981

2966

2982

/*

2967

/*

2983

* get_rps_cpu is called from netif_receive_skb and returns the target

2968

* get_rps_cpu is called from netif_receive_skb and returns the target

2984

* CPU from the RPS map of the receiving queue for a given skb.

2969

* CPU from the RPS map of the receiving queue for a given skb.

2985

* rcu_read_lock must be held on entry.

2970

* rcu_read_lock must be held on entry.

2986

*/

2971

*/

2987

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2972

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2988

struct rps_dev_flow **rflowp)

2973

struct rps_dev_flow **rflowp)

2989

{

2974

{

2990

struct netdev_rx_queue *rxqueue;

2975

struct netdev_rx_queue *rxqueue;

2991

struct rps_map *map;

2976

struct rps_map *map;

2992

struct rps_dev_flow_table *flow_table;

2977

struct rps_dev_flow_table *flow_table;

2993

struct rps_sock_flow_table *sock_flow_table;

2978

struct rps_sock_flow_table *sock_flow_table;

2994

int cpu = -1;

2979

int cpu = -1;

2995

u16 tcpu;

2980

u16 tcpu;

2996

2981

2997

if (skb_rx_queue_recorded(skb)) {

2982

if (skb_rx_queue_recorded(skb)) {

2998

u16 index = skb_get_rx_queue(skb);

2983

u16 index = skb_get_rx_queue(skb);

2999

if (unlikely(index >= dev->real_num_rx_queues)) {

2984

if (unlikely(index >= dev->real_num_rx_queues)) {

3000

WARN_ONCE(dev->real_num_rx_queues > 1,

2985

WARN_ONCE(dev->real_num_rx_queues > 1,

3001

"%s received packet on queue %u, but number "

2986

"%s received packet on queue %u, but number "

3002

"of RX queues is %u\n",

2987

"of RX queues is %u\n",

3003

dev->name, index, dev->real_num_rx_queues);

2988

dev->name, index, dev->real_num_rx_queues);

3004

goto done;

2989

goto done;

3005

}

2990

}

3006

rxqueue = dev->_rx + index;

2991

rxqueue = dev->_rx + index;

3007

} else

2992

} else

3008

rxqueue = dev->_rx;

2993

rxqueue = dev->_rx;

3009

2994

3010

map = rcu_dereference(rxqueue->rps_map);

2995

map = rcu_dereference(rxqueue->rps_map);

3011

if (map) {

2996

if (map) {

3012

if (map->len == 1 &&

2997

if (map->len == 1 &&

3013

!rcu_access_pointer(rxqueue->rps_flow_table)) {

2998

!rcu_access_pointer(rxqueue->rps_flow_table)) {

3014

tcpu = map->cpus[0];

2999

tcpu = map->cpus[0];

3015

if (cpu_online(tcpu))

3000

if (cpu_online(tcpu))

3016

cpu = tcpu;

3001

cpu = tcpu;

3017

goto done;

3002

goto done;

3018

}

3003

}

3019

} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {

3004

} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {

3020

goto done;

3005

goto done;

3021

}

3006

}

3022

3007

3023

skb_reset_network_header(skb);

3008

skb_reset_network_header(skb);

3024

if (!skb_get_rxhash(skb))

3009

if (!skb_get_rxhash(skb))

3025

goto done;

3010

goto done;

3026

3011

3027

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3012

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3028

sock_flow_table = rcu_dereference(rps_sock_flow_table);

3013

sock_flow_table = rcu_dereference(rps_sock_flow_table);

3029

if (flow_table && sock_flow_table) {

3014

if (flow_table && sock_flow_table) {

3030

u16 next_cpu;

3015

u16 next_cpu;

3031

struct rps_dev_flow *rflow;

3016

struct rps_dev_flow *rflow;

3032

3017

3033

rflow = &flow_table->flows[skb->rxhash & flow_table->mask];

3018

rflow = &flow_table->flows[skb->rxhash & flow_table->mask];

3034

tcpu = rflow->cpu;

3019

tcpu = rflow->cpu;

3035

3020

3036

next_cpu = sock_flow_table->ents[skb->rxhash &

3021

next_cpu = sock_flow_table->ents[skb->rxhash &

3037

sock_flow_table->mask];

3022

sock_flow_table->mask];

3038

3023

3039

/*

3024

/*

3040

* If the desired CPU (where last recvmsg was done) is

3025

* If the desired CPU (where last recvmsg was done) is

3041

* different from current CPU (one in the rx-queue flow

3026

* different from current CPU (one in the rx-queue flow

3042

* table entry), switch if one of the following holds:

3027

* table entry), switch if one of the following holds:

3043

* - Current CPU is unset (equal to RPS_NO_CPU).

3028

* - Current CPU is unset (equal to RPS_NO_CPU).

3044

* - Current CPU is offline.

3029

* - Current CPU is offline.

3045

* - The current CPU's queue tail has advanced beyond the

3030

* - The current CPU's queue tail has advanced beyond the

3046

* last packet that was enqueued using this table entry.

3031

* last packet that was enqueued using this table entry.

3047

* This guarantees that all previous packets for the flow

3032

* This guarantees that all previous packets for the flow

3048

* have been dequeued, thus preserving in order delivery.

3033

* have been dequeued, thus preserving in order delivery.

3049

*/

3034

*/

3050

if (unlikely(tcpu != next_cpu) &&

3035

if (unlikely(tcpu != next_cpu) &&

3051

(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||

3036

(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||

3052

((int)(per_cpu(softnet_data, tcpu).input_queue_head -

3037

((int)(per_cpu(softnet_data, tcpu).input_queue_head -

3053

rflow->last_qtail)) >= 0)) {

3038

rflow->last_qtail)) >= 0)) {

3054

tcpu = next_cpu;

3039

tcpu = next_cpu;

3055

rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

3040

rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

3056

}

3041

}

3057

3042

3058

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {

3043

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {

3059

*rflowp = rflow;

3044

*rflowp = rflow;

3060

cpu = tcpu;

3045

cpu = tcpu;

3061

goto done;

3046

goto done;

3062

}

3047

}

3063

}

3048

}

3064

3049

3065

if (map) {

3050

if (map) {

3066

tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];

3051

tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];

3067

3052

3068

if (cpu_online(tcpu)) {

3053

if (cpu_online(tcpu)) {

3069

cpu = tcpu;

3054

cpu = tcpu;

3070

goto done;

3055

goto done;

3071

}

3056

}

3072

}

3057

}

3073

3058

3074

done:

3059

done:

3075

return cpu;

3060

return cpu;

3076

}

3061

}

3077

3062

3078

#ifdef CONFIG_RFS_ACCEL

3063

#ifdef CONFIG_RFS_ACCEL

3079

3064

3080

/**

3065

/**

3081

* rps_may_expire_flow - check whether an RFS hardware filter may be removed

3066

* rps_may_expire_flow - check whether an RFS hardware filter may be removed

3082

* @dev: Device on which the filter was set

3067

* @dev: Device on which the filter was set

3083

* @rxq_index: RX queue index

3068

* @rxq_index: RX queue index

3084

* @flow_id: Flow ID passed to ndo_rx_flow_steer()

3069

* @flow_id: Flow ID passed to ndo_rx_flow_steer()

3085

* @filter_id: Filter ID returned by ndo_rx_flow_steer()

3070

* @filter_id: Filter ID returned by ndo_rx_flow_steer()

3086

*

3071

*

3087

* Drivers that implement ndo_rx_flow_steer() should periodically call

3072

* Drivers that implement ndo_rx_flow_steer() should periodically call

3088

* this function for each installed filter and remove the filters for

3073

* this function for each installed filter and remove the filters for

3089

* which it returns %true.

3074

* which it returns %true.

3090

*/

3075

*/

3091

bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,

3076

bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,

3092

u32 flow_id, u16 filter_id)

3077

u32 flow_id, u16 filter_id)

3093

{

3078

{

3094

struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;

3079

struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;

3095

struct rps_dev_flow_table *flow_table;

3080

struct rps_dev_flow_table *flow_table;

3096

struct rps_dev_flow *rflow;

3081

struct rps_dev_flow *rflow;

3097

bool expire = true;

3082

bool expire = true;

3098

int cpu;

3083

int cpu;

3099

3084

3100

rcu_read_lock();

3085

rcu_read_lock();

3101

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3086

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3102

if (flow_table && flow_id <= flow_table->mask) {

3087

if (flow_table && flow_id <= flow_table->mask) {

3103

rflow = &flow_table->flows[flow_id];

3088

rflow = &flow_table->flows[flow_id];

3104

cpu = ACCESS_ONCE(rflow->cpu);

3089

cpu = ACCESS_ONCE(rflow->cpu);

3105

if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&

3090

if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&

3106

((int)(per_cpu(softnet_data, cpu).input_queue_head -

3091

((int)(per_cpu(softnet_data, cpu).input_queue_head -

3107

rflow->last_qtail) <

3092

rflow->last_qtail) <

3108

(int)(10 * flow_table->mask)))

3093

(int)(10 * flow_table->mask)))

3109

expire = false;

3094

expire = false;

3110

}

3095

}

3111

rcu_read_unlock();

3096

rcu_read_unlock();

3112

return expire;

3097

return expire;

3113

}

3098

}

3114

EXPORT_SYMBOL(rps_may_expire_flow);

3099

EXPORT_SYMBOL(rps_may_expire_flow);

3115

3100

3116

#endif /* CONFIG_RFS_ACCEL */

3101

#endif /* CONFIG_RFS_ACCEL */

3117

3102

3118

/* Called from hardirq (IPI) context */

3103

/* Called from hardirq (IPI) context */

3119

static void rps_trigger_softirq(void *data)

3104

static void rps_trigger_softirq(void *data)

3120

{

3105

{

3121

struct softnet_data *sd = data;

3106

struct softnet_data *sd = data;

3122

3107

3123

____napi_schedule(sd, &sd->backlog);

3108

____napi_schedule(sd, &sd->backlog);

3124

sd->received_rps++;

3109

sd->received_rps++;

3125

}

3110

}

3126

3111

3127

#endif /* CONFIG_RPS */

3112

#endif /* CONFIG_RPS */

3128

3113

3129

/*

3114

/*

3130

* Check if this softnet_data structure is another cpu one

3115

* Check if this softnet_data structure is another cpu one

3131

* If yes, queue it to our IPI list and return 1

3116

* If yes, queue it to our IPI list and return 1

3132

* If no, return 0

3117

* If no, return 0

3133

*/

3118

*/

3134

static int rps_ipi_queued(struct softnet_data *sd)

3119

static int rps_ipi_queued(struct softnet_data *sd)

3135

{

3120

{

3136

#ifdef CONFIG_RPS

3121

#ifdef CONFIG_RPS

3137

struct softnet_data *mysd = &__get_cpu_var(softnet_data);

3122

struct softnet_data *mysd = &__get_cpu_var(softnet_data);

3138

3123

3139

if (sd != mysd) {

3124

if (sd != mysd) {

3140

sd->rps_ipi_next = mysd->rps_ipi_list;

3125

sd->rps_ipi_next = mysd->rps_ipi_list;

3141

mysd->rps_ipi_list = sd;

3126

mysd->rps_ipi_list = sd;

3142

3127

3143

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

3128

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

3144

return 1;

3129

return 1;

3145

}

3130

}

3146

#endif /* CONFIG_RPS */

3131

#endif /* CONFIG_RPS */

3147

return 0;

3132

return 0;

3148

}

3133

}

3149

3134

3150

#ifdef CONFIG_NET_FLOW_LIMIT

3135

#ifdef CONFIG_NET_FLOW_LIMIT

3151

int netdev_flow_limit_table_len __read_mostly = (1 << 12);

3136

int netdev_flow_limit_table_len __read_mostly = (1 << 12);

3152

#endif

3137

#endif

3153

3138

3154

static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)

3139

static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)

3155

{

3140

{

3156

#ifdef CONFIG_NET_FLOW_LIMIT

3141

#ifdef CONFIG_NET_FLOW_LIMIT

3157

struct sd_flow_limit *fl;

3142

struct sd_flow_limit *fl;

3158

struct softnet_data *sd;

3143

struct softnet_data *sd;

3159

unsigned int old_flow, new_flow;

3144

unsigned int old_flow, new_flow;

3160

3145

3161

if (qlen < (netdev_max_backlog >> 1))

3146

if (qlen < (netdev_max_backlog >> 1))

3162

return false;

3147

return false;

3163

3148

3164

sd = &__get_cpu_var(softnet_data);

3149

sd = &__get_cpu_var(softnet_data);

3165

3150

3166

rcu_read_lock();

3151

rcu_read_lock();

3167

fl = rcu_dereference(sd->flow_limit);

3152

fl = rcu_dereference(sd->flow_limit);

3168

if (fl) {

3153

if (fl) {

3169

new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);

3154

new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);

3170

old_flow = fl->history[fl->history_head];

3155

old_flow = fl->history[fl->history_head];

3171

fl->history[fl->history_head] = new_flow;

3156

fl->history[fl->history_head] = new_flow;

3172

3157

3173

fl->history_head++;

3158

fl->history_head++;

3174

fl->history_head &= FLOW_LIMIT_HISTORY - 1;

3159

fl->history_head &= FLOW_LIMIT_HISTORY - 1;

3175

3160

3176

if (likely(fl->buckets[old_flow]))

3161

if (likely(fl->buckets[old_flow]))

3177

fl->buckets[old_flow]--;

3162

fl->buckets[old_flow]--;

3178

3163

3179

if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {

3164

if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {

3180

fl->count++;

3165

fl->count++;

3181

rcu_read_unlock();

3166

rcu_read_unlock();

3182

return true;

3167

return true;

3183

}

3168

}

3184

}

3169

}

3185

rcu_read_unlock();

3170

rcu_read_unlock();

3186

#endif

3171

#endif

3187

return false;

3172

return false;

3188

}

3173

}

3189

3174

3190

/*

3175

/*

3191

* enqueue_to_backlog is called to queue an skb to a per CPU backlog

3176

* enqueue_to_backlog is called to queue an skb to a per CPU backlog

3192

* queue (may be a remote CPU queue).

3177

* queue (may be a remote CPU queue).

3193

*/

3178

*/

3194

static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

3179

static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

3195

unsigned int *qtail)

3180

unsigned int *qtail)

3196

{

3181

{

3197

struct softnet_data *sd;

3182

struct softnet_data *sd;

3198

unsigned long flags;

3183

unsigned long flags;

3199

unsigned int qlen;

3184

unsigned int qlen;

3200

3185

3201

sd = &per_cpu(softnet_data, cpu);

3186

sd = &per_cpu(softnet_data, cpu);

3202

3187

3203

local_irq_save(flags);

3188

local_irq_save(flags);

3204

3189

3205

rps_lock(sd);

3190

rps_lock(sd);

3206

qlen = skb_queue_len(&sd->input_pkt_queue);

3191

qlen = skb_queue_len(&sd->input_pkt_queue);

3207

if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {

3192

if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {

3208

if (skb_queue_len(&sd->input_pkt_queue)) {

3193

if (skb_queue_len(&sd->input_pkt_queue)) {

3209

enqueue:

3194

enqueue:

3210

__skb_queue_tail(&sd->input_pkt_queue, skb);

3195

__skb_queue_tail(&sd->input_pkt_queue, skb);

3211

input_queue_tail_incr_save(sd, qtail);

3196

input_queue_tail_incr_save(sd, qtail);

3212

rps_unlock(sd);

3197

rps_unlock(sd);

3213

local_irq_restore(flags);

3198

local_irq_restore(flags);

3214

return NET_RX_SUCCESS;

3199

return NET_RX_SUCCESS;

3215

}

3200

}

3216

3201

3217

/* Schedule NAPI for backlog device

3202

/* Schedule NAPI for backlog device

3218

* We can use non atomic operation since we own the queue lock

3203

* We can use non atomic operation since we own the queue lock

3219

*/

3204

*/

3220

if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {

3205

if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {

3221

if (!rps_ipi_queued(sd))

3206

if (!rps_ipi_queued(sd))

3222

____napi_schedule(sd, &sd->backlog);

3207

____napi_schedule(sd, &sd->backlog);

3223

}

3208

}

3224

goto enqueue;

3209

goto enqueue;

3225

}

3210

}

3226

3211

3227

sd->dropped++;

3212

sd->dropped++;

3228

rps_unlock(sd);

3213

rps_unlock(sd);

3229

3214

3230

local_irq_restore(flags);

3215

local_irq_restore(flags);

3231

3216

3232

atomic_long_inc(&skb->dev->rx_dropped);

3217

atomic_long_inc(&skb->dev->rx_dropped);

3233

kfree_skb(skb);

3218

kfree_skb(skb);

3234

return NET_RX_DROP;

3219

return NET_RX_DROP;

3235

}

3220

}

3236

3221

3237

/**

3222

/**

3238

* netif_rx - post buffer to the network code

3223

* netif_rx - post buffer to the network code

3239

* @skb: buffer to post

3224

* @skb: buffer to post

3240

*

3225

*

3241

* This function receives a packet from a device driver and queues it for

3226

* This function receives a packet from a device driver and queues it for

3242

* the upper (protocol) levels to process. It always succeeds. The buffer

3227

* the upper (protocol) levels to process. It always succeeds. The buffer

3243

* may be dropped during processing for congestion control or by the

3228

* may be dropped during processing for congestion control or by the

3244

* protocol layers.

3229

* protocol layers.

3245

*

3230

*

3246

* return values:

3231

* return values:

3247

* NET_RX_SUCCESS (no congestion)

3232

* NET_RX_SUCCESS (no congestion)

3248

* NET_RX_DROP (packet was dropped)

3233

* NET_RX_DROP (packet was dropped)

3249

*

3234

*

3250

*/

3235

*/

3251

3236

3252

int netif_rx(struct sk_buff *skb)

3237

int netif_rx(struct sk_buff *skb)

3253

{

3238

{

3254

int ret;

3239

int ret;

3255

3240

3256

/* if netpoll wants it, pretend we never saw it */

3241

/* if netpoll wants it, pretend we never saw it */

3257

if (netpoll_rx(skb))

3242

if (netpoll_rx(skb))

3258

return NET_RX_DROP;

3243

return NET_RX_DROP;

3259

3244

3260

net_timestamp_check(netdev_tstamp_prequeue, skb);

3245

net_timestamp_check(netdev_tstamp_prequeue, skb);

3261

3246

3262

trace_netif_rx(skb);

3247

trace_netif_rx(skb);

3263

#ifdef CONFIG_RPS

3248

#ifdef CONFIG_RPS

3264

if (static_key_false(&rps_needed)) {

3249

if (static_key_false(&rps_needed)) {

3265

struct rps_dev_flow voidflow, *rflow = &voidflow;

3250

struct rps_dev_flow voidflow, *rflow = &voidflow;

3266

int cpu;

3251

int cpu;

3267

3252

3268

preempt_disable();

3253

preempt_disable();

3269

rcu_read_lock();

3254

rcu_read_lock();

3270

3255

3271

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3256

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3272

if (cpu < 0)

3257

if (cpu < 0)

3273

cpu = smp_processor_id();

3258

cpu = smp_processor_id();

3274

3259

3275

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3260

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3276

3261

3277

rcu_read_unlock();

3262

rcu_read_unlock();

3278

preempt_enable();

3263

preempt_enable();

3279

} else

3264

} else

3280

#endif

3265

#endif

3281

{

3266

{

3282

unsigned int qtail;

3267

unsigned int qtail;

3283

ret = enqueue_to_backlog(skb, get_cpu(), &qtail);

3268

ret = enqueue_to_backlog(skb, get_cpu(), &qtail);

3284

put_cpu();

3269

put_cpu();

3285

}

3270

}

3286

return ret;

3271

return ret;

3287

}

3272

}

3288

EXPORT_SYMBOL(netif_rx);

3273

EXPORT_SYMBOL(netif_rx);

3289

3274

3290

int netif_rx_ni(struct sk_buff *skb)

3275

int netif_rx_ni(struct sk_buff *skb)

3291

{

3276

{

3292

int err;

3277

int err;

3293

3278

3294

preempt_disable();

3279

preempt_disable();

3295

err = netif_rx(skb);

3280

err = netif_rx(skb);

3296

if (local_softirq_pending())

3281

if (local_softirq_pending())

3297

do_softirq();

3282

do_softirq();

3298

preempt_enable();

3283

preempt_enable();

3299

3284

3300

return err;

3285

return err;

3301

}

3286

}

3302

EXPORT_SYMBOL(netif_rx_ni);

3287

EXPORT_SYMBOL(netif_rx_ni);

3303

3288

3304

static void net_tx_action(struct softirq_action *h)

3289

static void net_tx_action(struct softirq_action *h)

3305

{

3290

{

3306

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3291

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3307

3292

3308

if (sd->completion_queue) {

3293

if (sd->completion_queue) {

3309

struct sk_buff *clist;

3294

struct sk_buff *clist;

3310

3295

3311

local_irq_disable();

3296

local_irq_disable();

3312

clist = sd->completion_queue;

3297

clist = sd->completion_queue;

3313

sd->completion_queue = NULL;

3298

sd->completion_queue = NULL;

3314

local_irq_enable();

3299

local_irq_enable();

3315

3300

3316

while (clist) {

3301

while (clist) {

3317

struct sk_buff *skb = clist;

3302

struct sk_buff *skb = clist;

3318

clist = clist->next;

3303

clist = clist->next;

3319

3304

3320

WARN_ON(atomic_read(&skb->users));

3305

WARN_ON(atomic_read(&skb->users));

3321

if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))

3306

if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))

3322

trace_consume_skb(skb);

3307

trace_consume_skb(skb);

3323

else

3308

else

3324

trace_kfree_skb(skb, net_tx_action);

3309

trace_kfree_skb(skb, net_tx_action);

3325

__kfree_skb(skb);

3310

__kfree_skb(skb);

3326

}

3311

}

3327

}

3312

}

3328

3313

3329

if (sd->output_queue) {

3314

if (sd->output_queue) {

3330

struct Qdisc *head;

3315

struct Qdisc *head;

3331

3316

3332

local_irq_disable();

3317

local_irq_disable();

3333

head = sd->output_queue;

3318

head = sd->output_queue;

3334

sd->output_queue = NULL;

3319

sd->output_queue = NULL;

3335

sd->output_queue_tailp = &sd->output_queue;

3320

sd->output_queue_tailp = &sd->output_queue;

3336

local_irq_enable();

3321

local_irq_enable();

3337

3322

3338

while (head) {

3323

while (head) {

3339

struct Qdisc *q = head;

3324

struct Qdisc *q = head;

3340

spinlock_t *root_lock;

3325

spinlock_t *root_lock;

3341

3326

3342

head = head->next_sched;

3327

head = head->next_sched;

3343

3328

3344

root_lock = qdisc_lock(q);

3329

root_lock = qdisc_lock(q);

3345

if (spin_trylock(root_lock)) {

3330

if (spin_trylock(root_lock)) {

3346

smp_mb__before_clear_bit();

3331

smp_mb__before_clear_bit();

3347

clear_bit(__QDISC_STATE_SCHED,

3332

clear_bit(__QDISC_STATE_SCHED,

3348

&q->state);

3333

&q->state);

3349

qdisc_run(q);

3334

qdisc_run(q);

3350

spin_unlock(root_lock);

3335

spin_unlock(root_lock);

3351

} else {

3336

} else {

3352

if (!test_bit(__QDISC_STATE_DEACTIVATED,

3337

if (!test_bit(__QDISC_STATE_DEACTIVATED,

3353

&q->state)) {

3338

&q->state)) {

3354

__netif_reschedule(q);

3339

__netif_reschedule(q);

3355

} else {

3340

} else {

3356

smp_mb__before_clear_bit();

3341

smp_mb__before_clear_bit();

3357

clear_bit(__QDISC_STATE_SCHED,

3342

clear_bit(__QDISC_STATE_SCHED,

3358

&q->state);

3343

&q->state);

3359

}

3344

}

3360

}

3345

}

3361

}

3346

}

3362

}

3347

}

3363

}

3348

}

3364

3349

3365

#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \

3350

#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \

3366

(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))

3351

(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))

3367

/* This hook is defined here for ATM LANE */

3352

/* This hook is defined here for ATM LANE */

3368

int (*br_fdb_test_addr_hook)(struct net_device *dev,

3353

int (*br_fdb_test_addr_hook)(struct net_device *dev,

3369

unsigned char *addr) __read_mostly;

3354

unsigned char *addr) __read_mostly;

3370

EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);

3355

EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);

3371

#endif

3356

#endif

3372

3357

3373

#ifdef CONFIG_NET_CLS_ACT

3358

#ifdef CONFIG_NET_CLS_ACT

3374

/* TODO: Maybe we should just force sch_ingress to be compiled in

3359

/* TODO: Maybe we should just force sch_ingress to be compiled in

3375

* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions

3360

* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions

3376

* a compare and 2 stores extra right now if we dont have it on

3361

* a compare and 2 stores extra right now if we dont have it on

3377

* but have CONFIG_NET_CLS_ACT

3362

* but have CONFIG_NET_CLS_ACT

3378

* NOTE: This doesn't stop any functionality; if you dont have

3363

* NOTE: This doesn't stop any functionality; if you dont have

3379

* the ingress scheduler, you just can't add policies on ingress.

3364

* the ingress scheduler, you just can't add policies on ingress.

3380

*

3365

*

3381

*/

3366

*/

3382

static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)

3367

static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)

3383

{

3368

{

3384

struct net_device *dev = skb->dev;

3369

struct net_device *dev = skb->dev;

3385

u32 ttl = G_TC_RTTL(skb->tc_verd);

3370

u32 ttl = G_TC_RTTL(skb->tc_verd);

3386

int result = TC_ACT_OK;

3371

int result = TC_ACT_OK;

3387

struct Qdisc *q;

3372

struct Qdisc *q;

3388

3373

3389

if (unlikely(MAX_RED_LOOP < ttl++)) {

3374

if (unlikely(MAX_RED_LOOP < ttl++)) {

3390

net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",

3375

net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",

3391

skb->skb_iif, dev->ifindex);

3376

skb->skb_iif, dev->ifindex);

3392

return TC_ACT_SHOT;

3377

return TC_ACT_SHOT;

3393

}

3378

}

3394

3379

3395

skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);

3380

skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);

3396

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);

3381

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);

3397

3382

3398

q = rxq->qdisc;

3383

q = rxq->qdisc;

3399

if (q != &noop_qdisc) {

3384

if (q != &noop_qdisc) {

3400

spin_lock(qdisc_lock(q));

3385

spin_lock(qdisc_lock(q));

3401

if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))

3386

if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))

3402

result = qdisc_enqueue_root(skb, q);

3387

result = qdisc_enqueue_root(skb, q);

3403

spin_unlock(qdisc_lock(q));

3388

spin_unlock(qdisc_lock(q));

3404

}

3389

}

3405

3390

3406

return result;

3391

return result;

3407

}

3392

}

3408

3393

3409

static inline struct sk_buff *handle_ing(struct sk_buff *skb,

3394

static inline struct sk_buff *handle_ing(struct sk_buff *skb,

3410

struct packet_type **pt_prev,

3395

struct packet_type **pt_prev,

3411

int *ret, struct net_device *orig_dev)

3396

int *ret, struct net_device *orig_dev)

3412

{

3397

{

3413

struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);

3398

struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);

3414

3399

3415

if (!rxq || rxq->qdisc == &noop_qdisc)

3400

if (!rxq || rxq->qdisc == &noop_qdisc)

3416

goto out;

3401

goto out;

3417

3402

3418

if (*pt_prev) {

3403

if (*pt_prev) {

3419

*ret = deliver_skb(skb, *pt_prev, orig_dev);

3404

*ret = deliver_skb(skb, *pt_prev, orig_dev);

3420

*pt_prev = NULL;

3405

*pt_prev = NULL;

3421

}

3406

}

3422

3407

3423

switch (ing_filter(skb, rxq)) {

3408

switch (ing_filter(skb, rxq)) {

3424

case TC_ACT_SHOT:

3409

case TC_ACT_SHOT:

3425

case TC_ACT_STOLEN:

3410

case TC_ACT_STOLEN:

3426

kfree_skb(skb);

3411

kfree_skb(skb);

3427

return NULL;

3412

return NULL;

3428

}

3413

}

3429

3414

3430

out:

3415

out:

3431

skb->tc_verd = 0;

3416

skb->tc_verd = 0;

3432

return skb;

3417

return skb;

3433

}

3418

}

3434

#endif

3419

#endif

3435

3420

3436

/**

3421

/**

3437

* netdev_rx_handler_register - register receive handler

3422

* netdev_rx_handler_register - register receive handler

3438

* @dev: device to register a handler for

3423

* @dev: device to register a handler for

3439

* @rx_handler: receive handler to register

3424

* @rx_handler: receive handler to register

3440

* @rx_handler_data: data pointer that is used by rx handler

3425

* @rx_handler_data: data pointer that is used by rx handler

3441

*

3426

*

3442

* Register a receive hander for a device. This handler will then be

3427

* Register a receive hander for a device. This handler will then be

3443

* called from __netif_receive_skb. A negative errno code is returned

3428

* called from __netif_receive_skb. A negative errno code is returned

3444

* on a failure.

3429

* on a failure.

3445

*

3430

*

3446

* The caller must hold the rtnl_mutex.

3431

* The caller must hold the rtnl_mutex.

3447

*

3432

*

3448

* For a general description of rx_handler, see enum rx_handler_result.

3433

* For a general description of rx_handler, see enum rx_handler_result.

3449

*/

3434

*/

3450

int netdev_rx_handler_register(struct net_device *dev,

3435

int netdev_rx_handler_register(struct net_device *dev,

3451

rx_handler_func_t *rx_handler,

3436

rx_handler_func_t *rx_handler,

3452

void *rx_handler_data)

3437

void *rx_handler_data)

3453

{

3438

{

3454

ASSERT_RTNL();

3439

ASSERT_RTNL();

3455

3440

3456

if (dev->rx_handler)

3441

if (dev->rx_handler)

3457

return -EBUSY;

3442

return -EBUSY;

3458

3443

3459

/* Note: rx_handler_data must be set before rx_handler */

3444

/* Note: rx_handler_data must be set before rx_handler */

3460

rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);

3445

rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);

3461

rcu_assign_pointer(dev->rx_handler, rx_handler);

3446

rcu_assign_pointer(dev->rx_handler, rx_handler);

3462

3447

3463

return 0;

3448

return 0;

3464

}

3449

}

3465

EXPORT_SYMBOL_GPL(netdev_rx_handler_register);

3450

EXPORT_SYMBOL_GPL(netdev_rx_handler_register);

3466

3451

3467

/**

3452

/**

3468

* netdev_rx_handler_unregister - unregister receive handler

3453

* netdev_rx_handler_unregister - unregister receive handler

3469

* @dev: device to unregister a handler from

3454

* @dev: device to unregister a handler from

3470

*

3455

*

3471

* Unregister a receive handler from a device.

3456

* Unregister a receive handler from a device.

3472

*

3457

*

3473

* The caller must hold the rtnl_mutex.

3458

* The caller must hold the rtnl_mutex.

3474

*/

3459

*/

3475

void netdev_rx_handler_unregister(struct net_device *dev)

3460

void netdev_rx_handler_unregister(struct net_device *dev)

3476

{

3461

{

3477

3462

3478

ASSERT_RTNL();

3463

ASSERT_RTNL();

3479

RCU_INIT_POINTER(dev->rx_handler, NULL);

3464

RCU_INIT_POINTER(dev->rx_handler, NULL);

3480

/* a reader seeing a non NULL rx_handler in a rcu_read_lock()

3465

/* a reader seeing a non NULL rx_handler in a rcu_read_lock()

3481

* section has a guarantee to see a non NULL rx_handler_data

3466

* section has a guarantee to see a non NULL rx_handler_data

3482

* as well.

3467

* as well.

3483

*/

3468

*/

3484

synchronize_net();

3469

synchronize_net();

3485

RCU_INIT_POINTER(dev->rx_handler_data, NULL);

3470

RCU_INIT_POINTER(dev->rx_handler_data, NULL);

3486

}

3471

}

3487

EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

3472

EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

3488

3473

3489

/*

3474

/*

3490

* Limit the use of PFMEMALLOC reserves to those protocols that implement

3475

* Limit the use of PFMEMALLOC reserves to those protocols that implement

3491

* the special handling of PFMEMALLOC skbs.

3476

* the special handling of PFMEMALLOC skbs.

3492

*/

3477

*/

3493

static bool skb_pfmemalloc_protocol(struct sk_buff *skb)

3478

static bool skb_pfmemalloc_protocol(struct sk_buff *skb)

3494

{

3479

{

3495

switch (skb->protocol) {

3480

switch (skb->protocol) {

3496

case __constant_htons(ETH_P_ARP):

3481

case __constant_htons(ETH_P_ARP):

3497

case __constant_htons(ETH_P_IP):

3482

case __constant_htons(ETH_P_IP):

3498

case __constant_htons(ETH_P_IPV6):

3483

case __constant_htons(ETH_P_IPV6):

3499

case __constant_htons(ETH_P_8021Q):

3484

case __constant_htons(ETH_P_8021Q):

3500

case __constant_htons(ETH_P_8021AD):

3485

case __constant_htons(ETH_P_8021AD):

3501

return true;

3486

return true;

3502

default:

3487

default:

3503

return false;

3488

return false;

3504

}

3489

}

3505

}

3490

}

3506

3491

3507

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)

3492

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)

3508

{

3493

{

3509

struct packet_type *ptype, *pt_prev;

3494

struct packet_type *ptype, *pt_prev;

3510

rx_handler_func_t *rx_handler;

3495

rx_handler_func_t *rx_handler;

3511

struct net_device *orig_dev;

3496

struct net_device *orig_dev;

3512

struct net_device *null_or_dev;

3497

struct net_device *null_or_dev;

3513

bool deliver_exact = false;

3498

bool deliver_exact = false;

3514

int ret = NET_RX_DROP;

3499

int ret = NET_RX_DROP;

3515

__be16 type;

3500

__be16 type;

3516

3501

3517

net_timestamp_check(!netdev_tstamp_prequeue, skb);

3502

net_timestamp_check(!netdev_tstamp_prequeue, skb);

3518

3503

3519

trace_netif_receive_skb(skb);

3504

trace_netif_receive_skb(skb);

3520

3505

3521

/* if we've gotten here through NAPI, check netpoll */

3506

/* if we've gotten here through NAPI, check netpoll */

3522

if (netpoll_receive_skb(skb))

3507

if (netpoll_receive_skb(skb))

3523

goto out;

3508

goto out;

3524

3509

3525

orig_dev = skb->dev;

3510

orig_dev = skb->dev;

3526

3511

3527

skb_reset_network_header(skb);

3512

skb_reset_network_header(skb);

3528

if (!skb_transport_header_was_set(skb))

3513

if (!skb_transport_header_was_set(skb))

3529

skb_reset_transport_header(skb);

3514

skb_reset_transport_header(skb);

3530

skb_reset_mac_len(skb);

3515

skb_reset_mac_len(skb);

3531

3516

3532

pt_prev = NULL;

3517

pt_prev = NULL;

3533

3518

3534

rcu_read_lock();

3519

rcu_read_lock();

3535

3520

3536

another_round:

3521

another_round:

3537

skb->skb_iif = skb->dev->ifindex;

3522

skb->skb_iif = skb->dev->ifindex;

3538

3523

3539

__this_cpu_inc(softnet_data.processed);

3524

__this_cpu_inc(softnet_data.processed);

3540

3525

3541

if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

3526

if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

3542

skb->protocol == cpu_to_be16(ETH_P_8021AD)) {

3527

skb->protocol == cpu_to_be16(ETH_P_8021AD)) {

3543

skb = vlan_untag(skb);

3528

skb = vlan_untag(skb);

3544

if (unlikely(!skb))

3529

if (unlikely(!skb))

3545

goto unlock;

3530

goto unlock;

3546

}

3531

}

3547

3532

3548

#ifdef CONFIG_NET_CLS_ACT

3533

#ifdef CONFIG_NET_CLS_ACT

3549

if (skb->tc_verd & TC_NCLS) {

3534

if (skb->tc_verd & TC_NCLS) {

3550

skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);

3535

skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);

3551

goto ncls;

3536

goto ncls;

3552

}

3537

}

3553

#endif

3538

#endif

3554

3539

3555

if (pfmemalloc)

3540

if (pfmemalloc)

3556

goto skip_taps;

3541

goto skip_taps;

3557

3542

3558

list_for_each_entry_rcu(ptype, &ptype_all, list) {

3543

list_for_each_entry_rcu(ptype, &ptype_all, list) {

3559

if (!ptype->dev || ptype->dev == skb->dev) {

3544

if (!ptype->dev || ptype->dev == skb->dev) {

3560

if (pt_prev)

3545

if (pt_prev)

3561

ret = deliver_skb(skb, pt_prev, orig_dev);

3546

ret = deliver_skb(skb, pt_prev, orig_dev);

3562

pt_prev = ptype;

3547

pt_prev = ptype;

3563

}

3548

}

3564

}

3549

}

3565

3550

3566

skip_taps:

3551

skip_taps:

3567

#ifdef CONFIG_NET_CLS_ACT

3552

#ifdef CONFIG_NET_CLS_ACT

3568

skb = handle_ing(skb, &pt_prev, &ret, orig_dev);

3553

skb = handle_ing(skb, &pt_prev, &ret, orig_dev);

3569

if (!skb)

3554

if (!skb)

3570

goto unlock;

3555

goto unlock;

3571

ncls:

3556

ncls:

3572

#endif

3557

#endif

3573

3558

3574

if (pfmemalloc && !skb_pfmemalloc_protocol(skb))

3559

if (pfmemalloc && !skb_pfmemalloc_protocol(skb))

3575

goto drop;

3560

goto drop;

3576

3561

3577

if (vlan_tx_tag_present(skb)) {

3562

if (vlan_tx_tag_present(skb)) {

3578

if (pt_prev) {

3563

if (pt_prev) {

3579

ret = deliver_skb(skb, pt_prev, orig_dev);

3564

ret = deliver_skb(skb, pt_prev, orig_dev);

3580

pt_prev = NULL;

3565

pt_prev = NULL;

3581

}

3566

}

3582

if (vlan_do_receive(&skb))

3567

if (vlan_do_receive(&skb))

3583

goto another_round;

3568

goto another_round;

3584

else if (unlikely(!skb))

3569

else if (unlikely(!skb))

3585

goto unlock;

3570

goto unlock;

3586

}

3571

}

3587

3572

3588

rx_handler = rcu_dereference(skb->dev->rx_handler);

3573

rx_handler = rcu_dereference(skb->dev->rx_handler);

3589

if (rx_handler) {

3574

if (rx_handler) {

3590

if (pt_prev) {

3575

if (pt_prev) {

3591

ret = deliver_skb(skb, pt_prev, orig_dev);

3576

ret = deliver_skb(skb, pt_prev, orig_dev);

3592

pt_prev = NULL;

3577

pt_prev = NULL;

3593

}

3578

}

3594

switch (rx_handler(&skb)) {

3579

switch (rx_handler(&skb)) {

3595

case RX_HANDLER_CONSUMED:

3580

case RX_HANDLER_CONSUMED:

3596

ret = NET_RX_SUCCESS;

3581

ret = NET_RX_SUCCESS;

3597

goto unlock;

3582

goto unlock;

3598

case RX_HANDLER_ANOTHER:

3583

case RX_HANDLER_ANOTHER:

3599

goto another_round;

3584

goto another_round;

3600

case RX_HANDLER_EXACT:

3585

case RX_HANDLER_EXACT:

3601

deliver_exact = true;

3586

deliver_exact = true;

3602

case RX_HANDLER_PASS:

3587

case RX_HANDLER_PASS:

3603

break;

3588

break;

3604

default:

3589

default:

3605

BUG();

3590

BUG();

3606

}

3591

}

3607

}

3592

}

3608

3593

3609

if (unlikely(vlan_tx_tag_present(skb))) {

3594

if (unlikely(vlan_tx_tag_present(skb))) {

3610

if (vlan_tx_tag_get_id(skb))

3595

if (vlan_tx_tag_get_id(skb))

3611

skb->pkt_type = PACKET_OTHERHOST;

3596

skb->pkt_type = PACKET_OTHERHOST;

3612

/* Note: we might in the future use prio bits

3597

/* Note: we might in the future use prio bits

3613

* and set skb->priority like in vlan_do_receive()

3598

* and set skb->priority like in vlan_do_receive()

3614

* For the time being, just ignore Priority Code Point

3599

* For the time being, just ignore Priority Code Point

3615

*/

3600

*/

3616

skb->vlan_tci = 0;

3601

skb->vlan_tci = 0;

3617

}

3602

}

3618

3603

3619

/* deliver only exact match when indicated */

3604

/* deliver only exact match when indicated */

3620

null_or_dev = deliver_exact ? skb->dev : NULL;

3605

null_or_dev = deliver_exact ? skb->dev : NULL;

3621

3606

3622

type = skb->protocol;

3607

type = skb->protocol;

3623

list_for_each_entry_rcu(ptype,

3608

list_for_each_entry_rcu(ptype,

3624

&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {

3609

&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {

3625

if (ptype->type == type &&

3610

if (ptype->type == type &&

3626

(ptype->dev == null_or_dev || ptype->dev == skb->dev ||

3611

(ptype->dev == null_or_dev || ptype->dev == skb->dev ||

3627

ptype->dev == orig_dev)) {

3612

ptype->dev == orig_dev)) {

3628

if (pt_prev)

3613

if (pt_prev)

3629

ret = deliver_skb(skb, pt_prev, orig_dev);

3614

ret = deliver_skb(skb, pt_prev, orig_dev);

3630

pt_prev = ptype;

3615

pt_prev = ptype;

3631

}

3616

}

3632

}

3617

}

3633

3618

3634

if (pt_prev) {

3619

if (pt_prev) {

3635

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

3620

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

3636

goto drop;

3621

goto drop;

3637

else

3622

else

3638

ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

3623

ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

3639

} else {

3624

} else {

3640

drop:

3625

drop:

3641

atomic_long_inc(&skb->dev->rx_dropped);

3626

atomic_long_inc(&skb->dev->rx_dropped);

3642

kfree_skb(skb);

3627

kfree_skb(skb);

3643

/* Jamal, now you will not able to escape explaining

3628

/* Jamal, now you will not able to escape explaining

3644

* me how you were going to use this. :-)

3629

* me how you were going to use this. :-)

3645

*/

3630

*/

3646

ret = NET_RX_DROP;

3631

ret = NET_RX_DROP;

3647

}

3632

}

3648

3633

3649

unlock:

3634

unlock:

3650

rcu_read_unlock();

3635

rcu_read_unlock();

3651

out:

3636

out:

3652

return ret;

3637

return ret;

3653

}

3638

}

3654

3639

3655

static int __netif_receive_skb(struct sk_buff *skb)

3640

static int __netif_receive_skb(struct sk_buff *skb)

3656

{

3641

{

3657

int ret;

3642

int ret;

3658

3643

3659

if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {

3644

if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {

3660

unsigned long pflags = current->flags;

3645

unsigned long pflags = current->flags;

3661

3646

3662

/*

3647

/*

3663

* PFMEMALLOC skbs are special, they should

3648

* PFMEMALLOC skbs are special, they should

3664

* - be delivered to SOCK_MEMALLOC sockets only

3649

* - be delivered to SOCK_MEMALLOC sockets only

3665

* - stay away from userspace

3650

* - stay away from userspace

3666

* - have bounded memory usage

3651

* - have bounded memory usage

3667

*

3652

*

3668

* Use PF_MEMALLOC as this saves us from propagating the allocation

3653

* Use PF_MEMALLOC as this saves us from propagating the allocation

3669

* context down to all allocation sites.

3654

* context down to all allocation sites.

3670

*/

3655

*/

3671

current->flags |= PF_MEMALLOC;

3656

current->flags |= PF_MEMALLOC;

3672

ret = __netif_receive_skb_core(skb, true);

3657

ret = __netif_receive_skb_core(skb, true);

3673

tsk_restore_flags(current, pflags, PF_MEMALLOC);

3658

tsk_restore_flags(current, pflags, PF_MEMALLOC);

3674

} else

3659

} else

3675

ret = __netif_receive_skb_core(skb, false);

3660

ret = __netif_receive_skb_core(skb, false);

3676

3661

3677

return ret;

3662

return ret;

3678

}

3663

}

3679

3664

3680

/**

3665

/**

3681

* netif_receive_skb - process receive buffer from network

3666

* netif_receive_skb - process receive buffer from network

3682

* @skb: buffer to process

3667

* @skb: buffer to process

3683

*

3668

*

3684

* netif_receive_skb() is the main receive data processing function.

3669

* netif_receive_skb() is the main receive data processing function.

3685

* It always succeeds. The buffer may be dropped during processing

3670

* It always succeeds. The buffer may be dropped during processing

3686

* for congestion control or by the protocol layers.

3671

* for congestion control or by the protocol layers.

3687

*

3672

*

3688

* This function may only be called from softirq context and interrupts

3673

* This function may only be called from softirq context and interrupts

3689

* should be enabled.

3674

* should be enabled.

3690

*

3675

*

3691

* Return values (usually ignored):

3676

* Return values (usually ignored):

3692

* NET_RX_SUCCESS: no congestion

3677

* NET_RX_SUCCESS: no congestion

3693

* NET_RX_DROP: packet was dropped

3678

* NET_RX_DROP: packet was dropped

3694

*/

3679

*/

3695

int netif_receive_skb(struct sk_buff *skb)

3680

int netif_receive_skb(struct sk_buff *skb)

3696

{

3681

{

3697

net_timestamp_check(netdev_tstamp_prequeue, skb);

3682

net_timestamp_check(netdev_tstamp_prequeue, skb);

3698

3683

3699

if (skb_defer_rx_timestamp(skb))

3684

if (skb_defer_rx_timestamp(skb))

3700

return NET_RX_SUCCESS;

3685

return NET_RX_SUCCESS;

3701

3686

3702

#ifdef CONFIG_RPS

3687

#ifdef CONFIG_RPS

3703

if (static_key_false(&rps_needed)) {

3688

if (static_key_false(&rps_needed)) {

3704

struct rps_dev_flow voidflow, *rflow = &voidflow;

3689

struct rps_dev_flow voidflow, *rflow = &voidflow;

3705

int cpu, ret;

3690

int cpu, ret;

3706

3691

3707

rcu_read_lock();

3692

rcu_read_lock();

3708

3693

3709

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3694

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3710

3695

3711

if (cpu >= 0) {

3696

if (cpu >= 0) {

3712

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3697

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3713

rcu_read_unlock();

3698

rcu_read_unlock();

3714

return ret;

3699

return ret;

3715

}

3700

}

3716

rcu_read_unlock();

3701

rcu_read_unlock();

3717

}

3702

}

3718

#endif

3703

#endif

3719

return __netif_receive_skb(skb);

3704

return __netif_receive_skb(skb);

3720

}

3705

}

3721

EXPORT_SYMBOL(netif_receive_skb);

3706

EXPORT_SYMBOL(netif_receive_skb);

3722

3707

3723

/* Network device is going away, flush any packets still pending

3708

/* Network device is going away, flush any packets still pending

3724

* Called with irqs disabled.

3709

* Called with irqs disabled.

3725

*/

3710

*/

3726

static void flush_backlog(void *arg)

3711

static void flush_backlog(void *arg)

3727

{

3712

{

3728

struct net_device *dev = arg;

3713

struct net_device *dev = arg;

3729

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3714

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3730

struct sk_buff *skb, *tmp;

3715

struct sk_buff *skb, *tmp;

3731

3716

3732

rps_lock(sd);

3717

rps_lock(sd);

3733

skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {

3718

skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {

3734

if (skb->dev == dev) {

3719

if (skb->dev == dev) {

3735

__skb_unlink(skb, &sd->input_pkt_queue);

3720

__skb_unlink(skb, &sd->input_pkt_queue);

3736

kfree_skb(skb);

3721

kfree_skb(skb);

3737

input_queue_head_incr(sd);

3722

input_queue_head_incr(sd);

3738

}

3723

}

3739

}

3724

}

3740

rps_unlock(sd);

3725

rps_unlock(sd);

3741

3726

3742

skb_queue_walk_safe(&sd->process_queue, skb, tmp) {

3727

skb_queue_walk_safe(&sd->process_queue, skb, tmp) {

3743

if (skb->dev == dev) {

3728

if (skb->dev == dev) {

3744

__skb_unlink(skb, &sd->process_queue);

3729

__skb_unlink(skb, &sd->process_queue);

3745

kfree_skb(skb);

3730

kfree_skb(skb);

3746

input_queue_head_incr(sd);

3731

input_queue_head_incr(sd);

3747

}

3732

}

3748

}

3733

}

3749

}

3734

}

3750

3735

3751

static int napi_gro_complete(struct sk_buff *skb)

3736

static int napi_gro_complete(struct sk_buff *skb)

3752

{

3737

{

3753

struct packet_offload *ptype;

3738

struct packet_offload *ptype;

3754

__be16 type = skb->protocol;

3739

__be16 type = skb->protocol;

3755

struct list_head *head = &offload_base;

3740

struct list_head *head = &offload_base;

3756

int err = -ENOENT;

3741

int err = -ENOENT;

3757

3742

3758

BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));

3743

BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));

3759

3744

3760

if (NAPI_GRO_CB(skb)->count == 1) {

3745

if (NAPI_GRO_CB(skb)->count == 1) {

3761

skb_shinfo(skb)->gso_size = 0;

3746

skb_shinfo(skb)->gso_size = 0;

3762

goto out;

3747

goto out;

3763

}

3748

}

3764

3749

3765

rcu_read_lock();

3750

rcu_read_lock();

3766

list_for_each_entry_rcu(ptype, head, list) {

3751

list_for_each_entry_rcu(ptype, head, list) {

3767

if (ptype->type != type || !ptype->callbacks.gro_complete)

3752

if (ptype->type != type || !ptype->callbacks.gro_complete)

3768

continue;

3753

continue;

3769

3754

3770

err = ptype->callbacks.gro_complete(skb);

3755

err = ptype->callbacks.gro_complete(skb);

3771

break;

3756

break;

3772

}

3757

}

3773

rcu_read_unlock();

3758

rcu_read_unlock();

3774

3759

3775

if (err) {

3760

if (err) {

3776

WARN_ON(&ptype->list == head);

3761

WARN_ON(&ptype->list == head);

3777

kfree_skb(skb);

3762

kfree_skb(skb);

3778

return NET_RX_SUCCESS;

3763

return NET_RX_SUCCESS;

3779

}

3764

}

3780

3765

3781

out:

3766

out:

3782

return netif_receive_skb(skb);

3767

return netif_receive_skb(skb);

3783

}

3768

}

3784

3769

3785

/* napi->gro_list contains packets ordered by age.

3770

/* napi->gro_list contains packets ordered by age.

3786

* youngest packets at the head of it.

3771

* youngest packets at the head of it.

3787

* Complete skbs in reverse order to reduce latencies.

3772

* Complete skbs in reverse order to reduce latencies.

3788

*/

3773

*/

3789

void napi_gro_flush(struct napi_struct *napi, bool flush_old)

3774

void napi_gro_flush(struct napi_struct *napi, bool flush_old)

3790

{

3775

{

3791

struct sk_buff *skb, *prev = NULL;

3776

struct sk_buff *skb, *prev = NULL;

3792

3777

3793

/* scan list and build reverse chain */

3778

/* scan list and build reverse chain */

3794

for (skb = napi->gro_list; skb != NULL; skb = skb->next) {

3779

for (skb = napi->gro_list; skb != NULL; skb = skb->next) {

3795

skb->prev = prev;

3780

skb->prev = prev;

3796

prev = skb;

3781

prev = skb;

3797

}

3782

}

3798

3783

3799

for (skb = prev; skb; skb = prev) {

3784

for (skb = prev; skb; skb = prev) {

3800

skb->next = NULL;

3785

skb->next = NULL;

3801

3786

3802

if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)

3787

if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)

3803

return;

3788

return;

3804

3789

3805

prev = skb->prev;

3790

prev = skb->prev;

3806

napi_gro_complete(skb);

3791

napi_gro_complete(skb);

3807

napi->gro_count--;

3792

napi->gro_count--;

3808

}

3793

}

3809

3794

3810

napi->gro_list = NULL;

3795

napi->gro_list = NULL;

3811

}

3796

}

3812

EXPORT_SYMBOL(napi_gro_flush);

3797

EXPORT_SYMBOL(napi_gro_flush);

3813

3798

3814

static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)

3799

static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)

3815

{

3800

{

3816

struct sk_buff *p;

3801

struct sk_buff *p;

3817

unsigned int maclen = skb->dev->hard_header_len;

3802

unsigned int maclen = skb->dev->hard_header_len;

3818

3803

3819

for (p = napi->gro_list; p; p = p->next) {

3804

for (p = napi->gro_list; p; p = p->next) {

3820

unsigned long diffs;

3805

unsigned long diffs;

3821

3806

3822

diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;

3807

diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;

3823

diffs |= p->vlan_tci ^ skb->vlan_tci;

3808

diffs |= p->vlan_tci ^ skb->vlan_tci;

3824

if (maclen == ETH_HLEN)

3809

if (maclen == ETH_HLEN)

3825

diffs |= compare_ether_header(skb_mac_header(p),

3810

diffs |= compare_ether_header(skb_mac_header(p),

3826

skb_gro_mac_header(skb));

3811

skb_gro_mac_header(skb));

3827

else if (!diffs)

3812

else if (!diffs)

3828

diffs = memcmp(skb_mac_header(p),

3813

diffs = memcmp(skb_mac_header(p),

3829

skb_gro_mac_header(skb),

3814

skb_gro_mac_header(skb),

3830

maclen);

3815

maclen);

3831

NAPI_GRO_CB(p)->same_flow = !diffs;

3816

NAPI_GRO_CB(p)->same_flow = !diffs;

3832

NAPI_GRO_CB(p)->flush = 0;

3817

NAPI_GRO_CB(p)->flush = 0;

3833

}

3818

}

3834

}

3819

}

3835

3820

3836

static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3821

static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3837

{

3822

{

3838

struct sk_buff **pp = NULL;

3823

struct sk_buff **pp = NULL;

3839

struct packet_offload *ptype;

3824

struct packet_offload *ptype;

3840

__be16 type = skb->protocol;

3825

__be16 type = skb->protocol;

3841

struct list_head *head = &offload_base;

3826

struct list_head *head = &offload_base;

3842

int same_flow;

3827

int same_flow;

3843

enum gro_result ret;

3828

enum gro_result ret;

3844

3829

3845

if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))

3830

if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))

3846

goto normal;

3831

goto normal;

3847

3832

3848

if (skb_is_gso(skb) || skb_has_frag_list(skb))

3833

if (skb_is_gso(skb) || skb_has_frag_list(skb))

3849

goto normal;

3834

goto normal;

3850

3835

3851

gro_list_prepare(napi, skb);

3836

gro_list_prepare(napi, skb);

3852

3837

3853

rcu_read_lock();

3838

rcu_read_lock();

3854

list_for_each_entry_rcu(ptype, head, list) {

3839

list_for_each_entry_rcu(ptype, head, list) {

3855

if (ptype->type != type || !ptype->callbacks.gro_receive)

3840

if (ptype->type != type || !ptype->callbacks.gro_receive)

3856

continue;

3841

continue;

3857

3842

3858

skb_set_network_header(skb, skb_gro_offset(skb));

3843

skb_set_network_header(skb, skb_gro_offset(skb));

3859

skb_reset_mac_len(skb);

3844

skb_reset_mac_len(skb);

3860

NAPI_GRO_CB(skb)->same_flow = 0;

3845

NAPI_GRO_CB(skb)->same_flow = 0;

3861

NAPI_GRO_CB(skb)->flush = 0;

3846

NAPI_GRO_CB(skb)->flush = 0;

3862

NAPI_GRO_CB(skb)->free = 0;

3847

NAPI_GRO_CB(skb)->free = 0;

3863

3848

3864

pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);

3849

pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);

3865

break;

3850

break;

3866

}

3851

}

3867

rcu_read_unlock();

3852

rcu_read_unlock();

3868

3853

3869

if (&ptype->list == head)

3854

if (&ptype->list == head)

3870

goto normal;

3855

goto normal;

3871

3856

3872

same_flow = NAPI_GRO_CB(skb)->same_flow;

3857

same_flow = NAPI_GRO_CB(skb)->same_flow;

3873

ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;

3858

ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;

3874

3859

3875

if (pp) {

3860

if (pp) {

3876

struct sk_buff *nskb = *pp;

3861

struct sk_buff *nskb = *pp;

3877

3862

3878

*pp = nskb->next;

3863

*pp = nskb->next;

3879

nskb->next = NULL;

3864

nskb->next = NULL;

3880

napi_gro_complete(nskb);

3865

napi_gro_complete(nskb);

3881

napi->gro_count--;

3866

napi->gro_count--;

3882

}

3867

}

3883

3868

3884

if (same_flow)

3869

if (same_flow)

3885

goto ok;

3870

goto ok;

3886

3871

3887

if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)

3872

if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)

3888

goto normal;

3873

goto normal;

3889

3874

3890

napi->gro_count++;

3875

napi->gro_count++;

3891

NAPI_GRO_CB(skb)->count = 1;

3876

NAPI_GRO_CB(skb)->count = 1;

3892

NAPI_GRO_CB(skb)->age = jiffies;

3877

NAPI_GRO_CB(skb)->age = jiffies;

3893

skb_shinfo(skb)->gso_size = skb_gro_len(skb);

3878

skb_shinfo(skb)->gso_size = skb_gro_len(skb);

3894

skb->next = napi->gro_list;

3879

skb->next = napi->gro_list;

3895

napi->gro_list = skb;

3880

napi->gro_list = skb;

3896

ret = GRO_HELD;

3881

ret = GRO_HELD;

3897

3882

3898

pull:

3883

pull:

3899

if (skb_headlen(skb) < skb_gro_offset(skb)) {

3884

if (skb_headlen(skb) < skb_gro_offset(skb)) {

3900

int grow = skb_gro_offset(skb) - skb_headlen(skb);

3885

int grow = skb_gro_offset(skb) - skb_headlen(skb);

3901

3886

3902

BUG_ON(skb->end - skb->tail < grow);

3887

BUG_ON(skb->end - skb->tail < grow);

3903

3888

3904

memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);

3889

memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);

3905

3890

3906

skb->tail += grow;

3891

skb->tail += grow;

3907

skb->data_len -= grow;

3892

skb->data_len -= grow;

3908

3893

3909

skb_shinfo(skb)->frags[0].page_offset += grow;

3894

skb_shinfo(skb)->frags[0].page_offset += grow;

3910

skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);

3895

skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);

3911

3896

3912

if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {

3897

if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {

3913

skb_frag_unref(skb, 0);

3898

skb_frag_unref(skb, 0);

3914

memmove(skb_shinfo(skb)->frags,

3899

memmove(skb_shinfo(skb)->frags,

3915

skb_shinfo(skb)->frags + 1,

3900

skb_shinfo(skb)->frags + 1,

3916

--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));

3901

--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));

3917

}

3902

}

3918

}

3903

}

3919

3904

3920

ok:

3905

ok:

3921

return ret;

3906

return ret;

3922

3907

3923

normal:

3908

normal:

3924

ret = GRO_NORMAL;

3909

ret = GRO_NORMAL;

3925

goto pull;

3910

goto pull;

3926

}

3911

}

3927

3912

3928

3913

3929

static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)

3914

static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)

3930

{

3915

{

3931

switch (ret) {

3916

switch (ret) {

3932

case GRO_NORMAL:

3917

case GRO_NORMAL:

3933

if (netif_receive_skb(skb))

3918

if (netif_receive_skb(skb))

3934

ret = GRO_DROP;

3919

ret = GRO_DROP;

3935

break;

3920

break;

3936

3921

3937

case GRO_DROP:

3922

case GRO_DROP:

3938

kfree_skb(skb);

3923

kfree_skb(skb);

3939

break;

3924

break;

3940

3925

3941

case GRO_MERGED_FREE:

3926

case GRO_MERGED_FREE:

3942

if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)

3927

if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)

3943

kmem_cache_free(skbuff_head_cache, skb);

3928

kmem_cache_free(skbuff_head_cache, skb);

3944

else

3929

else

3945

__kfree_skb(skb);

3930

__kfree_skb(skb);

3946

break;

3931

break;

3947

3932

3948

case GRO_HELD:

3933

case GRO_HELD:

3949

case GRO_MERGED:

3934

case GRO_MERGED:

3950

break;

3935

break;

3951

}

3936

}

3952

3937

3953

return ret;

3938

return ret;

3954

}

3939

}

3955

3940

3956

static void skb_gro_reset_offset(struct sk_buff *skb)

3941

static void skb_gro_reset_offset(struct sk_buff *skb)

3957

{

3942

{

3958

const struct skb_shared_info *pinfo = skb_shinfo(skb);

3943

const struct skb_shared_info *pinfo = skb_shinfo(skb);

3959

const skb_frag_t *frag0 = &pinfo->frags[0];

3944

const skb_frag_t *frag0 = &pinfo->frags[0];

3960

3945

3961

NAPI_GRO_CB(skb)->data_offset = 0;

3946

NAPI_GRO_CB(skb)->data_offset = 0;

3962

NAPI_GRO_CB(skb)->frag0 = NULL;

3947

NAPI_GRO_CB(skb)->frag0 = NULL;

3963

NAPI_GRO_CB(skb)->frag0_len = 0;

3948

NAPI_GRO_CB(skb)->frag0_len = 0;

3964

3949

3965

if (skb_mac_header(skb) == skb_tail_pointer(skb) &&

3950

if (skb_mac_header(skb) == skb_tail_pointer(skb) &&

3966

pinfo->nr_frags &&

3951

pinfo->nr_frags &&

3967

!PageHighMem(skb_frag_page(frag0))) {

3952

!PageHighMem(skb_frag_page(frag0))) {

3968

NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);

3953

NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);

3969

NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);

3954

NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);

3970

}

3955

}

3971

}

3956

}

3972

3957

3973

gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3958

gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3974

{

3959

{

3975

skb_gro_reset_offset(skb);

3960

skb_gro_reset_offset(skb);

3976

3961

3977

return napi_skb_finish(dev_gro_receive(napi, skb), skb);

3962

return napi_skb_finish(dev_gro_receive(napi, skb), skb);

3978

}

3963

}

3979

EXPORT_SYMBOL(napi_gro_receive);

3964

EXPORT_SYMBOL(napi_gro_receive);

3980

3965

3981

static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)

3966

static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)

3982

{

3967

{

3983

__skb_pull(skb, skb_headlen(skb));

3968

__skb_pull(skb, skb_headlen(skb));

3984

/* restore the reserve we had after netdev_alloc_skb_ip_align() */

3969

/* restore the reserve we had after netdev_alloc_skb_ip_align() */

3985

skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));

3970

skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));

3986

skb->vlan_tci = 0;

3971

skb->vlan_tci = 0;

3987

skb->dev = napi->dev;

3972

skb->dev = napi->dev;

3988

skb->skb_iif = 0;

3973

skb->skb_iif = 0;

3989

3974

3990

napi->skb = skb;

3975

napi->skb = skb;

3991

}

3976

}

3992

3977

3993

struct sk_buff *napi_get_frags(struct napi_struct *napi)

3978

struct sk_buff *napi_get_frags(struct napi_struct *napi)

3994

{

3979

{

3995

struct sk_buff *skb = napi->skb;

3980

struct sk_buff *skb = napi->skb;

3996

3981

3997

if (!skb) {

3982

if (!skb) {

3998

skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);

3983

skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);

3999

napi->skb = skb;

3984

napi->skb = skb;

4000

}

3985

}

4001

return skb;

3986

return skb;

4002

}

3987

}

4003

EXPORT_SYMBOL(napi_get_frags);

3988

EXPORT_SYMBOL(napi_get_frags);

4004

3989

4005

static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,

3990

static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,

4006

gro_result_t ret)

3991

gro_result_t ret)

4007

{

3992

{

4008

switch (ret) {

3993

switch (ret) {

4009

case GRO_NORMAL:

3994

case GRO_NORMAL:

4010

case GRO_HELD:

3995

case GRO_HELD:

4011

skb->protocol = eth_type_trans(skb, skb->dev);

3996

skb->protocol = eth_type_trans(skb, skb->dev);

4012

3997

4013

if (ret == GRO_HELD)

3998

if (ret == GRO_HELD)

4014

skb_gro_pull(skb, -ETH_HLEN);

3999

skb_gro_pull(skb, -ETH_HLEN);

4015

else if (netif_receive_skb(skb))

4000

else if (netif_receive_skb(skb))

4016

ret = GRO_DROP;

4001

ret = GRO_DROP;

4017

break;

4002

break;

4018

4003

4019

case GRO_DROP:

4004

case GRO_DROP:

4020

case GRO_MERGED_FREE:

4005

case GRO_MERGED_FREE:

4021

napi_reuse_skb(napi, skb);

4006

napi_reuse_skb(napi, skb);

4022

break;

4007

break;

4023

4008

4024

case GRO_MERGED:

4009

case GRO_MERGED:

4025

break;

4010

break;

4026

}

4011

}

4027

4012

4028

return ret;

4013

return ret;

4029

}

4014

}

4030

4015

4031

static struct sk_buff *napi_frags_skb(struct napi_struct *napi)

4016

static struct sk_buff *napi_frags_skb(struct napi_struct *napi)

4032

{

4017

{

4033

struct sk_buff *skb = napi->skb;

4018

struct sk_buff *skb = napi->skb;

4034

struct ethhdr *eth;

4019

struct ethhdr *eth;

4035

unsigned int hlen;

4020

unsigned int hlen;

4036

unsigned int off;

4021

unsigned int off;

4037

4022

4038

napi->skb = NULL;

4023

napi->skb = NULL;

4039

4024

4040

skb_reset_mac_header(skb);

4025

skb_reset_mac_header(skb);

4041

skb_gro_reset_offset(skb);

4026

skb_gro_reset_offset(skb);

4042

4027

4043

off = skb_gro_offset(skb);

4028

off = skb_gro_offset(skb);

4044

hlen = off + sizeof(*eth);

4029

hlen = off + sizeof(*eth);

4045

eth = skb_gro_header_fast(skb, off);

4030

eth = skb_gro_header_fast(skb, off);

4046

if (skb_gro_header_hard(skb, hlen)) {

4031

if (skb_gro_header_hard(skb, hlen)) {

4047

eth = skb_gro_header_slow(skb, hlen, off);

4032

eth = skb_gro_header_slow(skb, hlen, off);

4048

if (unlikely(!eth)) {

4033

if (unlikely(!eth)) {

4049

napi_reuse_skb(napi, skb);

4034

napi_reuse_skb(napi, skb);

4050

skb = NULL;

4035

skb = NULL;

4051

goto out;

4036

goto out;

4052

}

4037

}

4053

}

4038

}

4054

4039

4055

skb_gro_pull(skb, sizeof(*eth));

4040

skb_gro_pull(skb, sizeof(*eth));

4056

4041

4057

/*

4042

/*

4058

* This works because the only protocols we care about don't require

4043

* This works because the only protocols we care about don't require

4059

* special handling. We'll fix it up properly at the end.

4044

* special handling. We'll fix it up properly at the end.

4060

*/

4045

*/

4061

skb->protocol = eth->h_proto;

4046

skb->protocol = eth->h_proto;

4062

4047

4063

out:

4048

out:

4064

return skb;

4049

return skb;

4065

}

4050

}

4066

4051

4067

gro_result_t napi_gro_frags(struct napi_struct *napi)

4052

gro_result_t napi_gro_frags(struct napi_struct *napi)

4068

{

4053

{

4069

struct sk_buff *skb = napi_frags_skb(napi);

4054

struct sk_buff *skb = napi_frags_skb(napi);

4070

4055

4071

if (!skb)

4056

if (!skb)

4072

return GRO_DROP;

4057

return GRO_DROP;

4073

4058

4074

return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));

4059

return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));

4075

}

4060

}

4076

EXPORT_SYMBOL(napi_gro_frags);

4061

EXPORT_SYMBOL(napi_gro_frags);

4077

4062

4078

/*

4063

/*

4079

* net_rps_action sends any pending IPI's for rps.

4064

* net_rps_action sends any pending IPI's for rps.

4080

* Note: called with local irq disabled, but exits with local irq enabled.

4065

* Note: called with local irq disabled, but exits with local irq enabled.

4081

*/

4066

*/

4082

static void net_rps_action_and_irq_enable(struct softnet_data *sd)

4067

static void net_rps_action_and_irq_enable(struct softnet_data *sd)

4083

{

4068

{

4084

#ifdef CONFIG_RPS

4069

#ifdef CONFIG_RPS

4085

struct softnet_data *remsd = sd->rps_ipi_list;

4070

struct softnet_data *remsd = sd->rps_ipi_list;

4086

4071

4087

if (remsd) {

4072

if (remsd) {

4088

sd->rps_ipi_list = NULL;

4073

sd->rps_ipi_list = NULL;

4089

4074

4090

local_irq_enable();

4075

local_irq_enable();

4091

4076

4092

/* Send pending IPI's to kick RPS processing on remote cpus. */

4077

/* Send pending IPI's to kick RPS processing on remote cpus. */

4093

while (remsd) {

4078

while (remsd) {

4094

struct softnet_data *next = remsd->rps_ipi_next;

4079

struct softnet_data *next = remsd->rps_ipi_next;

4095

4080

4096

if (cpu_online(remsd->cpu))

4081

if (cpu_online(remsd->cpu))

4097

__smp_call_function_single(remsd->cpu,

4082

__smp_call_function_single(remsd->cpu,

4098

&remsd->csd, 0);

4083

&remsd->csd, 0);

4099

remsd = next;

4084

remsd = next;

4100

}

4085

}

4101

} else

4086

} else

4102

#endif

4087

#endif

4103

local_irq_enable();

4088

local_irq_enable();

4104

}

4089

}

4105

4090

4106

static int process_backlog(struct napi_struct *napi, int quota)

4091

static int process_backlog(struct napi_struct *napi, int quota)

4107

{

4092

{

4108

int work = 0;

4093

int work = 0;

4109

struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

4094

struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

4110

4095

4111

#ifdef CONFIG_RPS

4096

#ifdef CONFIG_RPS

4112

/* Check if we have pending ipi, its better to send them now,

4097

/* Check if we have pending ipi, its better to send them now,

4113

* not waiting net_rx_action() end.

4098

* not waiting net_rx_action() end.

4114

*/

4099

*/

4115

if (sd->rps_ipi_list) {

4100

if (sd->rps_ipi_list) {

4116

local_irq_disable();

4101

local_irq_disable();

4117

net_rps_action_and_irq_enable(sd);

4102

net_rps_action_and_irq_enable(sd);

4118

}

4103

}

4119

#endif

4104

#endif

4120

napi->weight = weight_p;

4105

napi->weight = weight_p;

4121

local_irq_disable();

4106

local_irq_disable();

4122

while (work < quota) {

4107

while (work < quota) {

4123

struct sk_buff *skb;

4108

struct sk_buff *skb;

4124

unsigned int qlen;

4109

unsigned int qlen;

4125

4110

4126

while ((skb = __skb_dequeue(&sd->process_queue))) {

4111

while ((skb = __skb_dequeue(&sd->process_queue))) {

4127

local_irq_enable();

4112

local_irq_enable();

4128

__netif_receive_skb(skb);

4113

__netif_receive_skb(skb);

4129

local_irq_disable();

4114

local_irq_disable();

4130

input_queue_head_incr(sd);

4115

input_queue_head_incr(sd);

4131

if (++work >= quota) {

4116

if (++work >= quota) {

4132

local_irq_enable();

4117

local_irq_enable();

4133

return work;

4118

return work;

4134

}

4119

}

4135

}

4120

}

4136

4121

4137

rps_lock(sd);

4122

rps_lock(sd);

4138

qlen = skb_queue_len(&sd->input_pkt_queue);

4123

qlen = skb_queue_len(&sd->input_pkt_queue);

4139

if (qlen)

4124

if (qlen)

4140

skb_queue_splice_tail_init(&sd->input_pkt_queue,

4125

skb_queue_splice_tail_init(&sd->input_pkt_queue,

4141

&sd->process_queue);

4126

&sd->process_queue);

4142

4127

4143

if (qlen < quota - work) {

4128

if (qlen < quota - work) {

4144

/*

4129

/*

4145

* Inline a custom version of __napi_complete().

4130

* Inline a custom version of __napi_complete().

4146

* only current cpu owns and manipulates this napi,

4131

* only current cpu owns and manipulates this napi,

4147

* and NAPI_STATE_SCHED is the only possible flag set on backlog.

4132

* and NAPI_STATE_SCHED is the only possible flag set on backlog.

4148

* we can use a plain write instead of clear_bit(),

4133

* we can use a plain write instead of clear_bit(),

4149

* and we dont need an smp_mb() memory barrier.

4134

* and we dont need an smp_mb() memory barrier.

4150

*/

4135

*/

4151

list_del(&napi->poll_list);

4136

list_del(&napi->poll_list);

4152

napi->state = 0;

4137

napi->state = 0;

4153

4138

4154

quota = work + qlen;

4139

quota = work + qlen;

4155

}

4140

}

4156

rps_unlock(sd);

4141

rps_unlock(sd);

4157

}

4142

}

4158

local_irq_enable();

4143

local_irq_enable();

4159

4144

4160

return work;

4145

return work;

4161

}

4146

}

4162

4147

4163

/**

4148

/**

4164

* __napi_schedule - schedule for receive

4149

* __napi_schedule - schedule for receive

4165

* @n: entry to schedule

4150

* @n: entry to schedule

4166

*

4151

*

4167

* The entry's receive function will be scheduled to run

4152

* The entry's receive function will be scheduled to run

4168

*/

4153

*/

4169

void __napi_schedule(struct napi_struct *n)

4154

void __napi_schedule(struct napi_struct *n)

4170

{

4155

{

4171

unsigned long flags;

4156

unsigned long flags;

4172

4157

4173

local_irq_save(flags);

4158

local_irq_save(flags);

4174

____napi_schedule(&__get_cpu_var(softnet_data), n);

4159

____napi_schedule(&__get_cpu_var(softnet_data), n);

4175

local_irq_restore(flags);

4160

local_irq_restore(flags);

4176

}

4161

}

4177

EXPORT_SYMBOL(__napi_schedule);

4162

EXPORT_SYMBOL(__napi_schedule);

4178

4163

4179

void __napi_complete(struct napi_struct *n)

4164

void __napi_complete(struct napi_struct *n)

4180

{

4165

{

4181

BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));

4166

BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));

4182

BUG_ON(n->gro_list);

4167

BUG_ON(n->gro_list);

4183

4168

4184

list_del(&n->poll_list);

4169

list_del(&n->poll_list);

4185

smp_mb__before_clear_bit();

4170

smp_mb__before_clear_bit();

4186

clear_bit(NAPI_STATE_SCHED, &n->state);

4171

clear_bit(NAPI_STATE_SCHED, &n->state);

4187

}

4172

}

4188

EXPORT_SYMBOL(__napi_complete);

4173

EXPORT_SYMBOL(__napi_complete);

4189

4174

4190

void napi_complete(struct napi_struct *n)

4175

void napi_complete(struct napi_struct *n)

4191

{

4176

{

4192

unsigned long flags;

4177

unsigned long flags;

4193

4178

4194

/*

4179

/*

4195

* don't let napi dequeue from the cpu poll list

4180

* don't let napi dequeue from the cpu poll list

4196

* just in case its running on a different cpu

4181

* just in case its running on a different cpu

4197

*/

4182

*/

4198

if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))

4183

if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))

4199

return;

4184

return;

4200

4185

4201

napi_gro_flush(n, false);

4186

napi_gro_flush(n, false);

4202

local_irq_save(flags);

4187

local_irq_save(flags);

4203

__napi_complete(n);

4188

__napi_complete(n);

4204

local_irq_restore(flags);

4189

local_irq_restore(flags);

4205

}

4190

}

4206

EXPORT_SYMBOL(napi_complete);

4191

EXPORT_SYMBOL(napi_complete);

4207

4192

4208

/* must be called under rcu_read_lock(), as we dont take a reference */

4193

/* must be called under rcu_read_lock(), as we dont take a reference */

4209

struct napi_struct *napi_by_id(unsigned int napi_id)

4194

struct napi_struct *napi_by_id(unsigned int napi_id)

4210

{

4195

{

4211

unsigned int hash = napi_id % HASH_SIZE(napi_hash);

4196

unsigned int hash = napi_id % HASH_SIZE(napi_hash);

4212

struct napi_struct *napi;

4197

struct napi_struct *napi;

4213

4198

4214

hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)

4199

hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)

4215

if (napi->napi_id == napi_id)

4200

if (napi->napi_id == napi_id)

4216

return napi;

4201

return napi;

4217

4202

4218

return NULL;

4203

return NULL;

4219

}

4204

}

4220

EXPORT_SYMBOL_GPL(napi_by_id);

4205

EXPORT_SYMBOL_GPL(napi_by_id);

4221

4206

4222

void napi_hash_add(struct napi_struct *napi)

4207

void napi_hash_add(struct napi_struct *napi)

4223

{

4208

{

4224

if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {

4209

if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {

4225

4210

4226

spin_lock(&napi_hash_lock);

4211

spin_lock(&napi_hash_lock);

4227

4212

4228

/* 0 is not a valid id, we also skip an id that is taken

4213

/* 0 is not a valid id, we also skip an id that is taken

4229

* we expect both events to be extremely rare

4214

* we expect both events to be extremely rare

4230

*/

4215

*/

4231

napi->napi_id = 0;

4216

napi->napi_id = 0;

4232

while (!napi->napi_id) {

4217

while (!napi->napi_id) {

4233

napi->napi_id = ++napi_gen_id;

4218

napi->napi_id = ++napi_gen_id;

4234

if (napi_by_id(napi->napi_id))

4219

if (napi_by_id(napi->napi_id))

4235

napi->napi_id = 0;

4220

napi->napi_id = 0;

4236

}

4221

}

4237

4222

4238

hlist_add_head_rcu(&napi->napi_hash_node,

4223

hlist_add_head_rcu(&napi->napi_hash_node,

4239

&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);

4224

&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);

4240

4225

4241

spin_unlock(&napi_hash_lock);

4226

spin_unlock(&napi_hash_lock);

4242

}

4227

}

4243

}

4228

}

4244

EXPORT_SYMBOL_GPL(napi_hash_add);

4229

EXPORT_SYMBOL_GPL(napi_hash_add);

4245

4230

4246

/* Warning : caller is responsible to make sure rcu grace period

4231

/* Warning : caller is responsible to make sure rcu grace period

4247

* is respected before freeing memory containing @napi

4232

* is respected before freeing memory containing @napi

4248

*/

4233

*/

4249

void napi_hash_del(struct napi_struct *napi)

4234

void napi_hash_del(struct napi_struct *napi)

4250

{

4235

{

4251

spin_lock(&napi_hash_lock);

4236

spin_lock(&napi_hash_lock);

4252

4237

4253

if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))

4238

if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))

4254

hlist_del_rcu(&napi->napi_hash_node);

4239

hlist_del_rcu(&napi->napi_hash_node);

4255

4240

4256

spin_unlock(&napi_hash_lock);

4241

spin_unlock(&napi_hash_lock);

4257

}

4242

}

4258

EXPORT_SYMBOL_GPL(napi_hash_del);

4243

EXPORT_SYMBOL_GPL(napi_hash_del);

4259

4244

4260

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

4245

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

4261

int (*poll)(struct napi_struct *, int), int weight)

4246

int (*poll)(struct napi_struct *, int), int weight)

4262

{

4247

{

4263

INIT_LIST_HEAD(&napi->poll_list);

4248

INIT_LIST_HEAD(&napi->poll_list);

4264

napi->gro_count = 0;

4249

napi->gro_count = 0;

4265

napi->gro_list = NULL;

4250

napi->gro_list = NULL;

4266

napi->skb = NULL;

4251

napi->skb = NULL;

4267

napi->poll = poll;

4252

napi->poll = poll;

4268

if (weight > NAPI_POLL_WEIGHT)

4253

if (weight > NAPI_POLL_WEIGHT)

4269

pr_err_once("netif_napi_add() called with weight %d on device %s\n",

4254

pr_err_once("netif_napi_add() called with weight %d on device %s\n",

4270

weight, dev->name);

4255

weight, dev->name);

4271

napi->weight = weight;

4256

napi->weight = weight;

4272

list_add(&napi->dev_list, &dev->napi_list);

4257

list_add(&napi->dev_list, &dev->napi_list);

4273

napi->dev = dev;

4258

napi->dev = dev;

4274

#ifdef CONFIG_NETPOLL

4259

#ifdef CONFIG_NETPOLL

4275

spin_lock_init(&napi->poll_lock);

4260

spin_lock_init(&napi->poll_lock);

4276

napi->poll_owner = -1;

4261

napi->poll_owner = -1;

4277

#endif

4262

#endif

4278

set_bit(NAPI_STATE_SCHED, &napi->state);

4263

set_bit(NAPI_STATE_SCHED, &napi->state);

4279

}

4264

}

4280

EXPORT_SYMBOL(netif_napi_add);

4265

EXPORT_SYMBOL(netif_napi_add);

4281

4266

4282

void netif_napi_del(struct napi_struct *napi)

4267

void netif_napi_del(struct napi_struct *napi)

4283

{

4268

{

4284

struct sk_buff *skb, *next;

4269

struct sk_buff *skb, *next;

4285

4270

4286

list_del_init(&napi->dev_list);

4271

list_del_init(&napi->dev_list);

4287

napi_free_frags(napi);

4272

napi_free_frags(napi);

4288

4273

4289

for (skb = napi->gro_list; skb; skb = next) {

4274

for (skb = napi->gro_list; skb; skb = next) {

4290

next = skb->next;

4275

next = skb->next;

4291

skb->next = NULL;

4276

skb->next = NULL;

4292

kfree_skb(skb);

4277

kfree_skb(skb);

4293

}

4278

}

4294

4279

4295

napi->gro_list = NULL;

4280

napi->gro_list = NULL;

4296

napi->gro_count = 0;

4281

napi->gro_count = 0;

4297

}

4282

}

4298

EXPORT_SYMBOL(netif_napi_del);

4283

EXPORT_SYMBOL(netif_napi_del);

4299

4284

4300

static void net_rx_action(struct softirq_action *h)

4285

static void net_rx_action(struct softirq_action *h)

4301

{

4286

{

4302

struct softnet_data *sd = &__get_cpu_var(softnet_data);

4287

struct softnet_data *sd = &__get_cpu_var(softnet_data);

4303

unsigned long time_limit = jiffies + 2;

4288

unsigned long time_limit = jiffies + 2;

4304

int budget = netdev_budget;

4289

int budget = netdev_budget;

4305

void *have;

4290

void *have;

4306

4291

4307

local_irq_disable();

4292

local_irq_disable();

4308

4293

4309

while (!list_empty(&sd->poll_list)) {

4294

while (!list_empty(&sd->poll_list)) {

4310

struct napi_struct *n;

4295

struct napi_struct *n;

4311

int work, weight;

4296

int work, weight;

4312

4297

4313

/* If softirq window is exhuasted then punt.

4298

/* If softirq window is exhuasted then punt.

4314

* Allow this to run for 2 jiffies since which will allow

4299

* Allow this to run for 2 jiffies since which will allow

4315

* an average latency of 1.5/HZ.

4300

* an average latency of 1.5/HZ.

4316

*/

4301

*/

4317

if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))

4302

if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))

4318

goto softnet_break;

4303

goto softnet_break;

4319

4304

4320

local_irq_enable();

4305

local_irq_enable();

4321

4306

4322

/* Even though interrupts have been re-enabled, this

4307

/* Even though interrupts have been re-enabled, this

4323

* access is safe because interrupts can only add new

4308

* access is safe because interrupts can only add new

4324

* entries to the tail of this list, and only ->poll()

4309

* entries to the tail of this list, and only ->poll()

4325

* calls can remove this head entry from the list.

4310

* calls can remove this head entry from the list.

4326

*/

4311

*/

4327

n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

4312

n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

4328

4313

4329

have = netpoll_poll_lock(n);

4314

have = netpoll_poll_lock(n);

4330

4315

4331

weight = n->weight;

4316

weight = n->weight;

4332

4317

4333

/* This NAPI_STATE_SCHED test is for avoiding a race

4318

/* This NAPI_STATE_SCHED test is for avoiding a race

4334

* with netpoll's poll_napi(). Only the entity which

4319

* with netpoll's poll_napi(). Only the entity which

4335

* obtains the lock and sees NAPI_STATE_SCHED set will

4320

* obtains the lock and sees NAPI_STATE_SCHED set will

4336

* actually make the ->poll() call. Therefore we avoid

4321

* actually make the ->poll() call. Therefore we avoid

4337

* accidentally calling ->poll() when NAPI is not scheduled.

4322

* accidentally calling ->poll() when NAPI is not scheduled.

4338

*/

4323

*/

4339

work = 0;

4324

work = 0;

4340

if (test_bit(NAPI_STATE_SCHED, &n->state)) {

4325

if (test_bit(NAPI_STATE_SCHED, &n->state)) {

4341

work = n->poll(n, weight);

4326

work = n->poll(n, weight);

4342

trace_napi_poll(n);

4327

trace_napi_poll(n);

4343

}

4328

}

4344

4329

4345

WARN_ON_ONCE(work > weight);

4330

WARN_ON_ONCE(work > weight);

4346

4331

4347

budget -= work;

4332

budget -= work;

4348

4333

4349

local_irq_disable();

4334

local_irq_disable();

4350

4335

4351

/* Drivers must not modify the NAPI state if they

4336

/* Drivers must not modify the NAPI state if they

4352

* consume the entire weight. In such cases this code

4337

* consume the entire weight. In such cases this code

4353

* still "owns" the NAPI instance and therefore can

4338

* still "owns" the NAPI instance and therefore can

4354

* move the instance around on the list at-will.

4339

* move the instance around on the list at-will.

4355

*/

4340

*/

4356

if (unlikely(work == weight)) {

4341

if (unlikely(work == weight)) {

4357

if (unlikely(napi_disable_pending(n))) {

4342

if (unlikely(napi_disable_pending(n))) {

4358

local_irq_enable();

4343

local_irq_enable();

4359

napi_complete(n);

4344

napi_complete(n);

4360

local_irq_disable();

4345

local_irq_disable();

4361

} else {

4346

} else {

4362

if (n->gro_list) {

4347

if (n->gro_list) {

4363

/* flush too old packets

4348

/* flush too old packets

4364

* If HZ < 1000, flush all packets.

4349

* If HZ < 1000, flush all packets.

4365

*/

4350

*/

4366

local_irq_enable();

4351

local_irq_enable();

4367

napi_gro_flush(n, HZ >= 1000);

4352

napi_gro_flush(n, HZ >= 1000);

4368

local_irq_disable();

4353

local_irq_disable();

4369

}

4354

}

4370

list_move_tail(&n->poll_list, &sd->poll_list);

4355

list_move_tail(&n->poll_list, &sd->poll_list);

4371

}

4356

}

4372

}

4357

}

4373

4358

4374

netpoll_poll_unlock(have);

4359

netpoll_poll_unlock(have);

4375

}

4360

}

4376

out:

4361

out:

4377

net_rps_action_and_irq_enable(sd);

4362

net_rps_action_and_irq_enable(sd);

4378

4363

4379

#ifdef CONFIG_NET_DMA

4364

#ifdef CONFIG_NET_DMA

4380

/*

4365

/*

4381

* There may not be any more sk_buffs coming right now, so push

4366

* There may not be any more sk_buffs coming right now, so push

4382

* any pending DMA copies to hardware

4367

* any pending DMA copies to hardware

4383

*/

4368

*/

4384

dma_issue_pending_all();

4369

dma_issue_pending_all();

4385

#endif

4370

#endif

4386

4371

4387

return;

4372

return;

4388

4373

4389

softnet_break:

4374

softnet_break:

4390

sd->time_squeeze++;

4375

sd->time_squeeze++;

4391

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

4376

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

4392

goto out;

4377

goto out;

4393

}

4378

}

4394

4379

4395

struct netdev_adjacent {

4380

struct netdev_adjacent {

4396

struct net_device *dev;

4381

struct net_device *dev;

4397

4382

4398

/* upper master flag, there can only be one master device per list */

4383

/* upper master flag, there can only be one master device per list */

4399

bool master;

4384

bool master;

4400

4385

4401

/* counter for the number of times this device was added to us */

4386

/* counter for the number of times this device was added to us */

4402

u16 ref_nr;

4387

u16 ref_nr;

4403

4388

4404

/* private field for the users */

4389

/* private field for the users */

4405

void *private;

4390

void *private;

4406

4391

4407

struct list_head list;

4392

struct list_head list;

4408

struct rcu_head rcu;

4393

struct rcu_head rcu;

4409

};

4394

};

4410

4395

4411

static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,

4396

static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,

4412

struct net_device *adj_dev,

4397

struct net_device *adj_dev,

4413

struct list_head *adj_list)

4398

struct list_head *adj_list)

4414

{

4399

{

4415

struct netdev_adjacent *adj;

4400

struct netdev_adjacent *adj;

4416

4401

4417

list_for_each_entry_rcu(adj, adj_list, list) {

4402

list_for_each_entry_rcu(adj, adj_list, list) {

4418

if (adj->dev == adj_dev)

4403

if (adj->dev == adj_dev)

4419

return adj;

4404

return adj;

4420

}

4405

}

4421

return NULL;

4406

return NULL;

4422

}

4407

}

4423

4408

4424

static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,

4409

static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,

4425

struct net_device *adj_dev,

4410

struct net_device *adj_dev,

4426

struct list_head *adj_list)

4411

struct list_head *adj_list)

4427

{

4412

{

4428

struct netdev_adjacent *adj;

4413

struct netdev_adjacent *adj;

4429

4414

4430

list_for_each_entry(adj, adj_list, list) {

4415

list_for_each_entry(adj, adj_list, list) {

4431

if (adj->dev == adj_dev)

4416

if (adj->dev == adj_dev)

4432

return adj;

4417

return adj;

4433

}

4418

}

4434

return NULL;

4419

return NULL;

4435

}

4420

}

4436

4421

4437

/**

4422

/**

4438

* netdev_has_upper_dev - Check if device is linked to an upper device

4423

* netdev_has_upper_dev - Check if device is linked to an upper device

4439

* @dev: device

4424

* @dev: device

4440

* @upper_dev: upper device to check

4425

* @upper_dev: upper device to check

4441

*

4426

*

4442

* Find out if a device is linked to specified upper device and return true

4427

* Find out if a device is linked to specified upper device and return true

4443

* in case it is. Note that this checks only immediate upper device,

4428

* in case it is. Note that this checks only immediate upper device,

4444

* not through a complete stack of devices. The caller must hold the RTNL lock.

4429

* not through a complete stack of devices. The caller must hold the RTNL lock.

4445

*/

4430

*/

4446

bool netdev_has_upper_dev(struct net_device *dev,

4431

bool netdev_has_upper_dev(struct net_device *dev,

4447

struct net_device *upper_dev)

4432

struct net_device *upper_dev)

4448

{

4433

{

4449

ASSERT_RTNL();

4434

ASSERT_RTNL();

4450

4435

4451

return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);

4436

return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);

4452

}

4437

}

4453

EXPORT_SYMBOL(netdev_has_upper_dev);

4438

EXPORT_SYMBOL(netdev_has_upper_dev);

4454

4439

4455

/**

4440

/**

4456

* netdev_has_any_upper_dev - Check if device is linked to some device

4441

* netdev_has_any_upper_dev - Check if device is linked to some device

4457

* @dev: device

4442

* @dev: device

4458

*

4443

*

4459

* Find out if a device is linked to an upper device and return true in case

4444

* Find out if a device is linked to an upper device and return true in case

4460

* it is. The caller must hold the RTNL lock.

4445

* it is. The caller must hold the RTNL lock.

4461

*/

4446

*/

4462

bool netdev_has_any_upper_dev(struct net_device *dev)

4447

bool netdev_has_any_upper_dev(struct net_device *dev)

4463

{

4448

{

4464

ASSERT_RTNL();

4449

ASSERT_RTNL();

4465

4450

4466

return !list_empty(&dev->all_adj_list.upper);

4451

return !list_empty(&dev->all_adj_list.upper);

4467

}

4452

}

4468

EXPORT_SYMBOL(netdev_has_any_upper_dev);

4453

EXPORT_SYMBOL(netdev_has_any_upper_dev);

4469

4454

4470

/**

4455

/**

4471

* netdev_master_upper_dev_get - Get master upper device

4456

* netdev_master_upper_dev_get - Get master upper device

4472

* @dev: device

4457

* @dev: device

4473

*

4458

*

4474

* Find a master upper device and return pointer to it or NULL in case

4459

* Find a master upper device and return pointer to it or NULL in case

4475

* it's not there. The caller must hold the RTNL lock.

4460

* it's not there. The caller must hold the RTNL lock.

4476

*/

4461

*/

4477

struct net_device *netdev_master_upper_dev_get(struct net_device *dev)

4462

struct net_device *netdev_master_upper_dev_get(struct net_device *dev)

4478

{

4463

{

4479

struct netdev_adjacent *upper;

4464

struct netdev_adjacent *upper;

4480

4465

4481

ASSERT_RTNL();

4466

ASSERT_RTNL();

4482

4467

4483

if (list_empty(&dev->adj_list.upper))

4468

if (list_empty(&dev->adj_list.upper))

4484

return NULL;

4469

return NULL;

4485

4470

4486

upper = list_first_entry(&dev->adj_list.upper,

4471

upper = list_first_entry(&dev->adj_list.upper,

4487

struct netdev_adjacent, list);

4472

struct netdev_adjacent, list);

4488

if (likely(upper->master))

4473

if (likely(upper->master))

4489

return upper->dev;

4474

return upper->dev;

4490

return NULL;

4475

return NULL;

4491

}

4476

}

4492

EXPORT_SYMBOL(netdev_master_upper_dev_get);

4477

EXPORT_SYMBOL(netdev_master_upper_dev_get);

4493

4478

4494

void *netdev_adjacent_get_private(struct list_head *adj_list)

4479

void *netdev_adjacent_get_private(struct list_head *adj_list)

4495

{

4480

{

4496

struct netdev_adjacent *adj;

4481

struct netdev_adjacent *adj;

4497

4482

4498

adj = list_entry(adj_list, struct netdev_adjacent, list);

4483

adj = list_entry(adj_list, struct netdev_adjacent, list);

4499

4484

4500

return adj->private;

4485

return adj->private;

4501

}

4486

}

4502

EXPORT_SYMBOL(netdev_adjacent_get_private);

4487

EXPORT_SYMBOL(netdev_adjacent_get_private);

4503

4488

4504

/**

4489

/**

4505

* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list

4490

* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list

4506

* @dev: device

4491

* @dev: device

4507

* @iter: list_head ** of the current position

4492

* @iter: list_head ** of the current position

4508

*

4493

*

4509

* Gets the next device from the dev's upper list, starting from iter

4494

* Gets the next device from the dev's upper list, starting from iter

4510

* position. The caller must hold RCU read lock.

4495

* position. The caller must hold RCU read lock.

4511

*/

4496

*/

4512

struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,

4497

struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,

4513

struct list_head **iter)

4498

struct list_head **iter)

4514

{

4499

{

4515

struct netdev_adjacent *upper;

4500

struct netdev_adjacent *upper;

4516

4501

4517

WARN_ON_ONCE(!rcu_read_lock_held());

4502

WARN_ON_ONCE(!rcu_read_lock_held());

4518

4503

4519

upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4504

upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4520

4505

4521

if (&upper->list == &dev->all_adj_list.upper)

4506

if (&upper->list == &dev->all_adj_list.upper)

4522

return NULL;

4507

return NULL;

4523

4508

4524

*iter = &upper->list;

4509

*iter = &upper->list;

4525

4510

4526

return upper->dev;

4511

return upper->dev;

4527

}

4512

}

4528

EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);

4513

EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);

4529

4514

4530

/**

4515

/**

4531

* netdev_lower_get_next_private - Get the next ->private from the

4516

* netdev_lower_get_next_private - Get the next ->private from the

4532

* lower neighbour list

4517

* lower neighbour list

4533

* @dev: device

4518

* @dev: device

4534

* @iter: list_head ** of the current position

4519

* @iter: list_head ** of the current position

4535

*

4520

*

4536

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4521

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4537

* list, starting from iter position. The caller must hold either hold the

4522

* list, starting from iter position. The caller must hold either hold the

4538

* RTNL lock or its own locking that guarantees that the neighbour lower

4523

* RTNL lock or its own locking that guarantees that the neighbour lower

4539

* list will remain unchainged.

4524

* list will remain unchainged.

4540

*/

4525

*/

4541

void *netdev_lower_get_next_private(struct net_device *dev,

4526

void *netdev_lower_get_next_private(struct net_device *dev,

4542

struct list_head **iter)

4527

struct list_head **iter)

4543

{

4528

{

4544

struct netdev_adjacent *lower;

4529

struct netdev_adjacent *lower;

4545

4530

4546

lower = list_entry(*iter, struct netdev_adjacent, list);

4531

lower = list_entry(*iter, struct netdev_adjacent, list);

4547

4532

4548

if (&lower->list == &dev->adj_list.lower)

4533

if (&lower->list == &dev->adj_list.lower)

4549

return NULL;

4534

return NULL;

4550

4535

4551

if (iter)

4536

if (iter)

4552

*iter = lower->list.next;

4537

*iter = lower->list.next;

4553

4538

4554

return lower->private;

4539

return lower->private;

4555

}

4540

}

4556

EXPORT_SYMBOL(netdev_lower_get_next_private);

4541

EXPORT_SYMBOL(netdev_lower_get_next_private);

4557

4542

4558

/**

4543

/**

4559

* netdev_lower_get_next_private_rcu - Get the next ->private from the

4544

* netdev_lower_get_next_private_rcu - Get the next ->private from the

4560

* lower neighbour list, RCU

4545

* lower neighbour list, RCU

4561

* variant

4546

* variant

4562

* @dev: device

4547

* @dev: device

4563

* @iter: list_head ** of the current position

4548

* @iter: list_head ** of the current position

4564

*

4549

*

4565

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4550

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4566

* list, starting from iter position. The caller must hold RCU read lock.

4551

* list, starting from iter position. The caller must hold RCU read lock.

4567

*/

4552

*/

4568

void *netdev_lower_get_next_private_rcu(struct net_device *dev,

4553

void *netdev_lower_get_next_private_rcu(struct net_device *dev,

4569

struct list_head **iter)

4554

struct list_head **iter)

4570

{

4555

{

4571

struct netdev_adjacent *lower;

4556

struct netdev_adjacent *lower;

4572

4557

4573

WARN_ON_ONCE(!rcu_read_lock_held());

4558

WARN_ON_ONCE(!rcu_read_lock_held());

4574

4559

4575

lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4560

lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4576

4561

4577

if (&lower->list == &dev->adj_list.lower)

4562

if (&lower->list == &dev->adj_list.lower)

4578

return NULL;

4563

return NULL;

4579

4564

4580

if (iter)

4565

if (iter)

4581

*iter = &lower->list;

4566

*iter = &lower->list;

4582

4567

4583

return lower->private;

4568

return lower->private;

4584

}

4569

}

4585

EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);

4570

EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);

4586

4571

4587

/**

4572

/**

4588

* netdev_master_upper_dev_get_rcu - Get master upper device

4573

* netdev_master_upper_dev_get_rcu - Get master upper device

4589

* @dev: device

4574

* @dev: device

4590

*

4575

*

4591

* Find a master upper device and return pointer to it or NULL in case

4576

* Find a master upper device and return pointer to it or NULL in case

4592

* it's not there. The caller must hold the RCU read lock.

4577

* it's not there. The caller must hold the RCU read lock.

4593

*/

4578

*/

4594

struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)

4579

struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)

4595

{

4580

{

4596

struct netdev_adjacent *upper;

4581

struct netdev_adjacent *upper;

4597

4582

4598

upper = list_first_or_null_rcu(&dev->adj_list.upper,

4583

upper = list_first_or_null_rcu(&dev->adj_list.upper,

4599

struct netdev_adjacent, list);

4584

struct netdev_adjacent, list);

4600

if (upper && likely(upper->master))

4585

if (upper && likely(upper->master))

4601

return upper->dev;

4586

return upper->dev;

4602

return NULL;

4587

return NULL;

4603

}

4588

}

4604

EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);

4589

EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);

4605

4590

4606

static int __netdev_adjacent_dev_insert(struct net_device *dev,

4591

static int __netdev_adjacent_dev_insert(struct net_device *dev,

4607

struct net_device *adj_dev,

4592

struct net_device *adj_dev,

4608

struct list_head *dev_list,

4593

struct list_head *dev_list,

4609

void *private, bool master)

4594

void *private, bool master)

4610

{

4595

{

4611

struct netdev_adjacent *adj;

4596

struct netdev_adjacent *adj;

4612

char linkname[IFNAMSIZ+7];

4597

char linkname[IFNAMSIZ+7];

4613

int ret;

4598

int ret;

4614

4599

4615

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4600

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4616

4601

4617

if (adj) {

4602

if (adj) {

4618

adj->ref_nr++;

4603

adj->ref_nr++;

4619

return 0;

4604

return 0;

4620

}

4605

}

4621

4606

4622

adj = kmalloc(sizeof(*adj), GFP_KERNEL);

4607

adj = kmalloc(sizeof(*adj), GFP_KERNEL);

4623

if (!adj)

4608

if (!adj)

4624

return -ENOMEM;

4609

return -ENOMEM;

4625

4610

4626

adj->dev = adj_dev;

4611

adj->dev = adj_dev;

4627

adj->master = master;

4612

adj->master = master;

4628

adj->ref_nr = 1;

4613

adj->ref_nr = 1;

4629

adj->private = private;

4614

adj->private = private;

4630

dev_hold(adj_dev);

4615

dev_hold(adj_dev);

4631

4616

4632

pr_debug("dev_hold for %s, because of link added from %s to %s\n",

4617

pr_debug("dev_hold for %s, because of link added from %s to %s\n",

4633

adj_dev->name, dev->name, adj_dev->name);

4618

adj_dev->name, dev->name, adj_dev->name);

4634

4619

4635

if (dev_list == &dev->adj_list.lower) {

4620

if (dev_list == &dev->adj_list.lower) {

4636

sprintf(linkname, "lower_%s", adj_dev->name);

4621

sprintf(linkname, "lower_%s", adj_dev->name);

4637

ret = sysfs_create_link(&(dev->dev.kobj),

4622

ret = sysfs_create_link(&(dev->dev.kobj),

4638

&(adj_dev->dev.kobj), linkname);

4623

&(adj_dev->dev.kobj), linkname);

4639

if (ret)

4624

if (ret)

4640

goto free_adj;

4625

goto free_adj;

4641

} else if (dev_list == &dev->adj_list.upper) {

4626

} else if (dev_list == &dev->adj_list.upper) {

4642

sprintf(linkname, "upper_%s", adj_dev->name);

4627

sprintf(linkname, "upper_%s", adj_dev->name);

4643

ret = sysfs_create_link(&(dev->dev.kobj),

4628

ret = sysfs_create_link(&(dev->dev.kobj),

4644

&(adj_dev->dev.kobj), linkname);

4629

&(adj_dev->dev.kobj), linkname);

4645

if (ret)

4630

if (ret)

4646

goto free_adj;

4631

goto free_adj;

4647

}

4632

}

4648

4633

4649

/* Ensure that master link is always the first item in list. */

4634

/* Ensure that master link is always the first item in list. */

4650

if (master) {

4635

if (master) {

4651

ret = sysfs_create_link(&(dev->dev.kobj),

4636

ret = sysfs_create_link(&(dev->dev.kobj),

4652

&(adj_dev->dev.kobj), "master");

4637

&(adj_dev->dev.kobj), "master");

4653

if (ret)

4638

if (ret)

4654

goto remove_symlinks;

4639

goto remove_symlinks;

4655

4640

4656

list_add_rcu(&adj->list, dev_list);

4641

list_add_rcu(&adj->list, dev_list);

4657

} else {

4642

} else {

4658

list_add_tail_rcu(&adj->list, dev_list);

4643

list_add_tail_rcu(&adj->list, dev_list);

4659

}

4644

}

4660

4645

4661

return 0;

4646

return 0;

4662

4647

4663

remove_symlinks:

4648

remove_symlinks:

4664

if (dev_list == &dev->adj_list.lower) {

4649

if (dev_list == &dev->adj_list.lower) {

4665

sprintf(linkname, "lower_%s", adj_dev->name);

4650

sprintf(linkname, "lower_%s", adj_dev->name);

4666

sysfs_remove_link(&(dev->dev.kobj), linkname);

4651

sysfs_remove_link(&(dev->dev.kobj), linkname);

4667

} else if (dev_list == &dev->adj_list.upper) {

4652

} else if (dev_list == &dev->adj_list.upper) {

4668

sprintf(linkname, "upper_%s", adj_dev->name);

4653

sprintf(linkname, "upper_%s", adj_dev->name);

4669

sysfs_remove_link(&(dev->dev.kobj), linkname);

4654

sysfs_remove_link(&(dev->dev.kobj), linkname);

4670

}

4655

}

4671

4656

4672

free_adj:

4657

free_adj:

4673

kfree(adj);

4658

kfree(adj);

4674

dev_put(adj_dev);

4659

dev_put(adj_dev);

4675

4660

4676

return ret;

4661

return ret;

4677

}

4662

}

4678

4663

4679

void __netdev_adjacent_dev_remove(struct net_device *dev,

4664

void __netdev_adjacent_dev_remove(struct net_device *dev,

4680

struct net_device *adj_dev,

4665

struct net_device *adj_dev,

4681

struct list_head *dev_list)

4666

struct list_head *dev_list)

4682

{

4667

{

4683

struct netdev_adjacent *adj;

4668

struct netdev_adjacent *adj;

4684

char linkname[IFNAMSIZ+7];

4669

char linkname[IFNAMSIZ+7];

4685

4670

4686

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4671

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4687

4672

4688

if (!adj) {

4673

if (!adj) {

4689

pr_err("tried to remove device %s from %s\n",

4674

pr_err("tried to remove device %s from %s\n",

4690

dev->name, adj_dev->name);

4675

dev->name, adj_dev->name);

4691

BUG();

4676

BUG();

4692

}

4677

}

4693

4678

4694

if (adj->ref_nr > 1) {

4679

if (adj->ref_nr > 1) {

4695

pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,

4680

pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,

4696

adj->ref_nr-1);

4681

adj->ref_nr-1);

4697

adj->ref_nr--;

4682

adj->ref_nr--;

4698

return;

4683

return;

4699

}

4684

}

4700

4685

4701

if (adj->master)

4686

if (adj->master)

4702

sysfs_remove_link(&(dev->dev.kobj), "master");

4687

sysfs_remove_link(&(dev->dev.kobj), "master");

4703

4688

4704

if (dev_list == &dev->adj_list.lower) {

4689

if (dev_list == &dev->adj_list.lower) {

4705

sprintf(linkname, "lower_%s", adj_dev->name);

4690

sprintf(linkname, "lower_%s", adj_dev->name);

4706

sysfs_remove_link(&(dev->dev.kobj), linkname);

4691

sysfs_remove_link(&(dev->dev.kobj), linkname);

4707

} else if (dev_list == &dev->adj_list.upper) {

4692

} else if (dev_list == &dev->adj_list.upper) {

4708

sprintf(linkname, "upper_%s", adj_dev->name);

4693

sprintf(linkname, "upper_%s", adj_dev->name);

4709

sysfs_remove_link(&(dev->dev.kobj), linkname);

4694

sysfs_remove_link(&(dev->dev.kobj), linkname);

4710

}

4695

}

4711

4696

4712

list_del_rcu(&adj->list);

4697

list_del_rcu(&adj->list);

4713

pr_debug("dev_put for %s, because link removed from %s to %s\n",

4698

pr_debug("dev_put for %s, because link removed from %s to %s\n",

4714

adj_dev->name, dev->name, adj_dev->name);

4699

adj_dev->name, dev->name, adj_dev->name);

4715

dev_put(adj_dev);

4700

dev_put(adj_dev);

4716

kfree_rcu(adj, rcu);

4701

kfree_rcu(adj, rcu);

4717

}

4702

}

4718

4703

4719

int __netdev_adjacent_dev_link_lists(struct net_device *dev,

4704

int __netdev_adjacent_dev_link_lists(struct net_device *dev,

4720

struct net_device *upper_dev,

4705

struct net_device *upper_dev,

4721

struct list_head *up_list,

4706

struct list_head *up_list,

4722

struct list_head *down_list,

4707

struct list_head *down_list,

4723

void *private, bool master)

4708

void *private, bool master)

4724

{

4709

{

4725

int ret;

4710

int ret;

4726

4711

4727

ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,

4712

ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,

4728

master);

4713

master);

4729

if (ret)

4714

if (ret)

4730

return ret;

4715

return ret;

4731

4716

4732

ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,

4717

ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,

4733

false);

4718

false);

4734

if (ret) {

4719

if (ret) {

4735

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4720

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4736

return ret;

4721

return ret;

4737

}

4722

}

4738

4723

4739

return 0;

4724

return 0;

4740

}

4725

}

4741

4726

4742

int __netdev_adjacent_dev_link(struct net_device *dev,

4727

int __netdev_adjacent_dev_link(struct net_device *dev,

4743

struct net_device *upper_dev)

4728

struct net_device *upper_dev)

4744

{

4729

{

4745

return __netdev_adjacent_dev_link_lists(dev, upper_dev,

4730

return __netdev_adjacent_dev_link_lists(dev, upper_dev,

4746

&dev->all_adj_list.upper,

4731

&dev->all_adj_list.upper,

4747

&upper_dev->all_adj_list.lower,

4732

&upper_dev->all_adj_list.lower,

4748

NULL, false);

4733

NULL, false);

4749

}

4734

}

4750

4735

4751

void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,

4736

void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,

4752

struct net_device *upper_dev,

4737

struct net_device *upper_dev,

4753

struct list_head *up_list,

4738

struct list_head *up_list,

4754

struct list_head *down_list)

4739

struct list_head *down_list)

4755

{

4740

{

4756

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4741

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4757

__netdev_adjacent_dev_remove(upper_dev, dev, down_list);

4742

__netdev_adjacent_dev_remove(upper_dev, dev, down_list);

4758

}

4743

}

4759

4744

4760

void __netdev_adjacent_dev_unlink(struct net_device *dev,

4745

void __netdev_adjacent_dev_unlink(struct net_device *dev,

4761

struct net_device *upper_dev)

4746

struct net_device *upper_dev)

4762

{

4747

{

4763

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4748

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4764

&dev->all_adj_list.upper,

4749

&dev->all_adj_list.upper,

4765

&upper_dev->all_adj_list.lower);

4750

&upper_dev->all_adj_list.lower);

4766

}

4751

}

4767

4752

4768

int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,

4753

int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,

4769

struct net_device *upper_dev,

4754

struct net_device *upper_dev,

4770

void *private, bool master)

4755

void *private, bool master)

4771

{

4756

{

4772

int ret = __netdev_adjacent_dev_link(dev, upper_dev);

4757

int ret = __netdev_adjacent_dev_link(dev, upper_dev);

4773

4758

4774

if (ret)

4759

if (ret)

4775

return ret;

4760

return ret;

4776

4761

4777

ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,

4762

ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,

4778

&dev->adj_list.upper,

4763

&dev->adj_list.upper,

4779

&upper_dev->adj_list.lower,

4764

&upper_dev->adj_list.lower,

4780

private, master);

4765

private, master);

4781

if (ret) {

4766

if (ret) {

4782

__netdev_adjacent_dev_unlink(dev, upper_dev);

4767

__netdev_adjacent_dev_unlink(dev, upper_dev);

4783

return ret;

4768

return ret;

4784

}

4769

}

4785

4770

4786

return 0;

4771

return 0;

4787

}

4772

}

4788

4773

4789

void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,

4774

void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,

4790

struct net_device *upper_dev)

4775

struct net_device *upper_dev)

4791

{

4776

{

4792

__netdev_adjacent_dev_unlink(dev, upper_dev);

4777

__netdev_adjacent_dev_unlink(dev, upper_dev);

4793

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4778

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4794

&dev->adj_list.upper,

4779

&dev->adj_list.upper,

4795

&upper_dev->adj_list.lower);

4780

&upper_dev->adj_list.lower);

4796

}

4781

}

4797

4782

4798

static int __netdev_upper_dev_link(struct net_device *dev,

4783

static int __netdev_upper_dev_link(struct net_device *dev,

4799

struct net_device *upper_dev, bool master,

4784

struct net_device *upper_dev, bool master,

4800

void *private)

4785

void *private)

4801

{

4786

{

4802

struct netdev_adjacent *i, *j, *to_i, *to_j;

4787

struct netdev_adjacent *i, *j, *to_i, *to_j;

4803

int ret = 0;

4788

int ret = 0;

4804

4789

4805

ASSERT_RTNL();

4790

ASSERT_RTNL();

4806

4791

4807

if (dev == upper_dev)

4792

if (dev == upper_dev)

4808

return -EBUSY;

4793

return -EBUSY;

4809

4794

4810

/* To prevent loops, check if dev is not upper device to upper_dev. */

4795

/* To prevent loops, check if dev is not upper device to upper_dev. */

4811

if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))

4796

if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))

4812

return -EBUSY;

4797

return -EBUSY;

4813

4798

4814

if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))

4799

if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))

4815

return -EEXIST;

4800

return -EEXIST;

4816

4801

4817

if (master && netdev_master_upper_dev_get(dev))

4802

if (master && netdev_master_upper_dev_get(dev))

4818

return -EBUSY;

4803

return -EBUSY;

4819

4804

4820

ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,

4805

ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,

4821

master);

4806

master);

4822

if (ret)

4807

if (ret)

4823

return ret;

4808

return ret;

4824

4809

4825

/* Now that we linked these devs, make all the upper_dev's

4810

/* Now that we linked these devs, make all the upper_dev's

4826

* all_adj_list.upper visible to every dev's all_adj_list.lower an

4811

* all_adj_list.upper visible to every dev's all_adj_list.lower an

4827

* versa, and don't forget the devices itself. All of these

4812

* versa, and don't forget the devices itself. All of these

4828

* links are non-neighbours.

4813

* links are non-neighbours.

4829

*/

4814

*/

4830

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4815

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4831

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4816

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4832

pr_debug("Interlinking %s with %s, non-neighbour\n",

4817

pr_debug("Interlinking %s with %s, non-neighbour\n",

4833

i->dev->name, j->dev->name);

4818

i->dev->name, j->dev->name);

4834

ret = __netdev_adjacent_dev_link(i->dev, j->dev);

4819

ret = __netdev_adjacent_dev_link(i->dev, j->dev);

4835

if (ret)

4820

if (ret)

4836

goto rollback_mesh;

4821

goto rollback_mesh;

4837

}

4822

}

4838

}

4823

}

4839

4824

4840

/* add dev to every upper_dev's upper device */

4825

/* add dev to every upper_dev's upper device */

4841

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4826

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4842

pr_debug("linking %s's upper device %s with %s\n",

4827

pr_debug("linking %s's upper device %s with %s\n",

4843

upper_dev->name, i->dev->name, dev->name);

4828

upper_dev->name, i->dev->name, dev->name);

4844

ret = __netdev_adjacent_dev_link(dev, i->dev);

4829

ret = __netdev_adjacent_dev_link(dev, i->dev);

4845

if (ret)

4830

if (ret)

4846

goto rollback_upper_mesh;

4831

goto rollback_upper_mesh;

4847

}

4832

}

4848

4833

4849

/* add upper_dev to every dev's lower device */

4834

/* add upper_dev to every dev's lower device */

4850

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4835

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4851

pr_debug("linking %s's lower device %s with %s\n", dev->name,

4836

pr_debug("linking %s's lower device %s with %s\n", dev->name,

4852

i->dev->name, upper_dev->name);

4837

i->dev->name, upper_dev->name);

4853

ret = __netdev_adjacent_dev_link(i->dev, upper_dev);

4838

ret = __netdev_adjacent_dev_link(i->dev, upper_dev);

4854

if (ret)

4839

if (ret)

4855

goto rollback_lower_mesh;

4840

goto rollback_lower_mesh;

4856

}

4841

}

4857

4842

4858

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4843

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4859

return 0;

4844

return 0;

4860

4845

4861

rollback_lower_mesh:

4846

rollback_lower_mesh:

4862

to_i = i;

4847

to_i = i;

4863

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4848

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4864

if (i == to_i)

4849

if (i == to_i)

4865

break;

4850

break;

4866

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4851

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4867

}

4852

}

4868

4853

4869

i = NULL;

4854

i = NULL;

4870

4855

4871

rollback_upper_mesh:

4856

rollback_upper_mesh:

4872

to_i = i;

4857

to_i = i;

4873

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4858

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4874

if (i == to_i)

4859

if (i == to_i)

4875

break;

4860

break;

4876

__netdev_adjacent_dev_unlink(dev, i->dev);

4861

__netdev_adjacent_dev_unlink(dev, i->dev);

4877

}

4862

}

4878

4863

4879

i = j = NULL;

4864

i = j = NULL;

4880

4865

4881

rollback_mesh:

4866

rollback_mesh:

4882

to_i = i;

4867

to_i = i;

4883

to_j = j;

4868

to_j = j;

4884

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4869

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4885

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4870

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4886

if (i == to_i && j == to_j)

4871

if (i == to_i && j == to_j)

4887

break;

4872

break;

4888

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4873

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4889

}

4874

}

4890

if (i == to_i)

4875

if (i == to_i)

4891

break;

4876

break;

4892

}

4877

}

4893

4878

4894

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4879

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4895

4880

4896

return ret;

4881

return ret;

4897

}

4882

}

4898

4883

4899

/**

4884

/**

4900

* netdev_upper_dev_link - Add a link to the upper device

4885

* netdev_upper_dev_link - Add a link to the upper device

4901

* @dev: device

4886

* @dev: device

4902

* @upper_dev: new upper device

4887

* @upper_dev: new upper device

4903

*

4888

*

4904

* Adds a link to device which is upper to this one. The caller must hold

4889

* Adds a link to device which is upper to this one. The caller must hold

4905

* the RTNL lock. On a failure a negative errno code is returned.

4890

* the RTNL lock. On a failure a negative errno code is returned.

4906

* On success the reference counts are adjusted and the function

4891

* On success the reference counts are adjusted and the function

4907

* returns zero.

4892

* returns zero.

4908

*/

4893

*/

4909

int netdev_upper_dev_link(struct net_device *dev,

4894

int netdev_upper_dev_link(struct net_device *dev,

4910

struct net_device *upper_dev)

4895

struct net_device *upper_dev)

4911

{

4896

{

4912

return __netdev_upper_dev_link(dev, upper_dev, false, NULL);

4897

return __netdev_upper_dev_link(dev, upper_dev, false, NULL);

4913

}

4898

}

4914

EXPORT_SYMBOL(netdev_upper_dev_link);

4899

EXPORT_SYMBOL(netdev_upper_dev_link);

4915

4900

4916

/**

4901

/**

4917

* netdev_master_upper_dev_link - Add a master link to the upper device

4902

* netdev_master_upper_dev_link - Add a master link to the upper device

4918

* @dev: device

4903

* @dev: device

4919

* @upper_dev: new upper device

4904

* @upper_dev: new upper device

4920

*

4905

*

4921

* Adds a link to device which is upper to this one. In this case, only

4906

* Adds a link to device which is upper to this one. In this case, only

4922

* one master upper device can be linked, although other non-master devices

4907

* one master upper device can be linked, although other non-master devices

4923

* might be linked as well. The caller must hold the RTNL lock.

4908

* might be linked as well. The caller must hold the RTNL lock.

4924

* On a failure a negative errno code is returned. On success the reference

4909

* On a failure a negative errno code is returned. On success the reference

4925

* counts are adjusted and the function returns zero.

4910

* counts are adjusted and the function returns zero.

4926

*/

4911

*/

4927

int netdev_master_upper_dev_link(struct net_device *dev,

4912

int netdev_master_upper_dev_link(struct net_device *dev,

4928

struct net_device *upper_dev)

4913

struct net_device *upper_dev)

4929

{

4914

{

4930

return __netdev_upper_dev_link(dev, upper_dev, true, NULL);

4915

return __netdev_upper_dev_link(dev, upper_dev, true, NULL);

4931

}

4916

}

4932

EXPORT_SYMBOL(netdev_master_upper_dev_link);

4917

EXPORT_SYMBOL(netdev_master_upper_dev_link);

4933

4918

4934

int netdev_master_upper_dev_link_private(struct net_device *dev,

4919

int netdev_master_upper_dev_link_private(struct net_device *dev,

4935

struct net_device *upper_dev,

4920

struct net_device *upper_dev,

4936

void *private)

4921

void *private)

4937

{

4922

{

4938

return __netdev_upper_dev_link(dev, upper_dev, true, private);

4923

return __netdev_upper_dev_link(dev, upper_dev, true, private);

4939

}

4924

}

4940

EXPORT_SYMBOL(netdev_master_upper_dev_link_private);

4925

EXPORT_SYMBOL(netdev_master_upper_dev_link_private);

4941

4926

4942

/**

4927

/**

4943

* netdev_upper_dev_unlink - Removes a link to upper device

4928

* netdev_upper_dev_unlink - Removes a link to upper device

4944

* @dev: device

4929

* @dev: device

4945

* @upper_dev: new upper device

4930

* @upper_dev: new upper device

4946

*

4931

*

4947

* Removes a link to device which is upper to this one. The caller must hold

4932

* Removes a link to device which is upper to this one. The caller must hold

4948

* the RTNL lock.

4933

* the RTNL lock.

4949

*/

4934

*/

4950

void netdev_upper_dev_unlink(struct net_device *dev,

4935

void netdev_upper_dev_unlink(struct net_device *dev,

4951

struct net_device *upper_dev)

4936

struct net_device *upper_dev)

4952

{

4937

{

4953

struct netdev_adjacent *i, *j;

4938

struct netdev_adjacent *i, *j;

4954

ASSERT_RTNL();

4939

ASSERT_RTNL();

4955

4940

4956

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4941

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4957

4942

4958

/* Here is the tricky part. We must remove all dev's lower

4943

/* Here is the tricky part. We must remove all dev's lower

4959

* devices from all upper_dev's upper devices and vice

4944

* devices from all upper_dev's upper devices and vice

4960

* versa, to maintain the graph relationship.

4945

* versa, to maintain the graph relationship.

4961

*/

4946

*/

4962

list_for_each_entry(i, &dev->all_adj_list.lower, list)

4947

list_for_each_entry(i, &dev->all_adj_list.lower, list)

4963

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)

4948

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)

4964

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4949

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4965

4950

4966

/* remove also the devices itself from lower/upper device

4951

/* remove also the devices itself from lower/upper device

4967

* list

4952

* list

4968

*/

4953

*/

4969

list_for_each_entry(i, &dev->all_adj_list.lower, list)

4954

list_for_each_entry(i, &dev->all_adj_list.lower, list)

4970

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4955

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4971

4956

4972

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)

4957

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)

4973

__netdev_adjacent_dev_unlink(dev, i->dev);

4958

__netdev_adjacent_dev_unlink(dev, i->dev);

4974

4959

4975

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4960

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4976

}

4961

}

4977

EXPORT_SYMBOL(netdev_upper_dev_unlink);

4962

EXPORT_SYMBOL(netdev_upper_dev_unlink);

4978

4963

4979

void *netdev_lower_dev_get_private_rcu(struct net_device *dev,

4964

void *netdev_lower_dev_get_private_rcu(struct net_device *dev,

4980

struct net_device *lower_dev)

4965

struct net_device *lower_dev)

4981

{

4966

{

4982

struct netdev_adjacent *lower;

4967

struct netdev_adjacent *lower;

4983

4968

4984

if (!lower_dev)

4969

if (!lower_dev)

4985

return NULL;

4970

return NULL;

4986

lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);

4971

lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);

4987

if (!lower)

4972

if (!lower)

4988

return NULL;

4973

return NULL;

4989

4974

4990

return lower->private;

4975

return lower->private;

4991

}

4976

}

4992

EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);

4977

EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);

4993

4978

4994

void *netdev_lower_dev_get_private(struct net_device *dev,

4979

void *netdev_lower_dev_get_private(struct net_device *dev,

4995

struct net_device *lower_dev)

4980

struct net_device *lower_dev)

4996

{

4981

{

4997

struct netdev_adjacent *lower;

4982

struct netdev_adjacent *lower;

4998

4983

4999

if (!lower_dev)

4984

if (!lower_dev)

5000

return NULL;

4985

return NULL;

5001

lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);

4986

lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);

5002

if (!lower)

4987

if (!lower)

5003

return NULL;

4988

return NULL;

5004

4989

5005

return lower->private;

4990

return lower->private;

5006

}

4991

}

5007

EXPORT_SYMBOL(netdev_lower_dev_get_private);

4992

EXPORT_SYMBOL(netdev_lower_dev_get_private);

5008

4993

5009

static void dev_change_rx_flags(struct net_device *dev, int flags)

4994

static void dev_change_rx_flags(struct net_device *dev, int flags)

5010

{

4995

{

5011

const struct net_device_ops *ops = dev->netdev_ops;

4996

const struct net_device_ops *ops = dev->netdev_ops;

5012

4997

5013

if (ops->ndo_change_rx_flags)

4998

if (ops->ndo_change_rx_flags)

5014

ops->ndo_change_rx_flags(dev, flags);

4999

ops->ndo_change_rx_flags(dev, flags);

5015

}

5000

}

5016

5001

5017

static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)

5002

static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)

5018

{

5003

{

5019

unsigned int old_flags = dev->flags;

5004

unsigned int old_flags = dev->flags;

5020

kuid_t uid;

5005

kuid_t uid;

5021

kgid_t gid;

5006

kgid_t gid;

5022

5007

5023

ASSERT_RTNL();

5008

ASSERT_RTNL();

5024

5009

5025

dev->flags |= IFF_PROMISC;

5010

dev->flags |= IFF_PROMISC;

5026

dev->promiscuity += inc;

5011

dev->promiscuity += inc;

5027

if (dev->promiscuity == 0) {

5012

if (dev->promiscuity == 0) {

5028

/*

5013

/*

5029

* Avoid overflow.

5014

* Avoid overflow.

5030

* If inc causes overflow, untouch promisc and return error.

5015

* If inc causes overflow, untouch promisc and return error.

5031

*/

5016

*/

5032

if (inc < 0)

5017

if (inc < 0)

5033

dev->flags &= ~IFF_PROMISC;

5018

dev->flags &= ~IFF_PROMISC;

5034

else {

5019

else {

5035

dev->promiscuity -= inc;

5020

dev->promiscuity -= inc;

5036

pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",

5021

pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",

5037

dev->name);

5022

dev->name);

5038

return -EOVERFLOW;

5023

return -EOVERFLOW;

5039

}

5024

}

5040

}

5025

}

5041

if (dev->flags != old_flags) {

5026

if (dev->flags != old_flags) {

5042

pr_info("device %s %s promiscuous mode\n",

5027

pr_info("device %s %s promiscuous mode\n",

5043

dev->name,

5028

dev->name,

5044

dev->flags & IFF_PROMISC ? "entered" : "left");

5029

dev->flags & IFF_PROMISC ? "entered" : "left");

5045

if (audit_enabled) {

5030

if (audit_enabled) {

5046

current_uid_gid(&uid, &gid);

5031

current_uid_gid(&uid, &gid);

5047

audit_log(current->audit_context, GFP_ATOMIC,

5032

audit_log(current->audit_context, GFP_ATOMIC,

5048

AUDIT_ANOM_PROMISCUOUS,

5033

AUDIT_ANOM_PROMISCUOUS,

5049

"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",

5034

"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",

5050

dev->name, (dev->flags & IFF_PROMISC),

5035

dev->name, (dev->flags & IFF_PROMISC),

5051

(old_flags & IFF_PROMISC),

5036

(old_flags & IFF_PROMISC),

5052

from_kuid(&init_user_ns, audit_get_loginuid(current)),

5037

from_kuid(&init_user_ns, audit_get_loginuid(current)),

5053

from_kuid(&init_user_ns, uid),

5038

from_kuid(&init_user_ns, uid),

5054

from_kgid(&init_user_ns, gid),

5039

from_kgid(&init_user_ns, gid),

5055

audit_get_sessionid(current));

5040

audit_get_sessionid(current));

5056

}

5041

}

5057

5042

5058

dev_change_rx_flags(dev, IFF_PROMISC);

5043

dev_change_rx_flags(dev, IFF_PROMISC);

5059

}

5044

}

5060

if (notify)

5045

if (notify)

5061

__dev_notify_flags(dev, old_flags, IFF_PROMISC);

5046

__dev_notify_flags(dev, old_flags, IFF_PROMISC);

5062

return 0;

5047

return 0;

5063

}

5048

}

5064

5049

5065

/**

5050

/**

5066

* dev_set_promiscuity - update promiscuity count on a device

5051

* dev_set_promiscuity - update promiscuity count on a device

5067

* @dev: device

5052

* @dev: device

5068

* @inc: modifier

5053

* @inc: modifier

5069

*

5054

*

5070

* Add or remove promiscuity from a device. While the count in the device

5055

* Add or remove promiscuity from a device. While the count in the device

5071

* remains above zero the interface remains promiscuous. Once it hits zero

5056

* remains above zero the interface remains promiscuous. Once it hits zero

5072

* the device reverts back to normal filtering operation. A negative inc

5057

* the device reverts back to normal filtering operation. A negative inc

5073

* value is used to drop promiscuity on the device.

5058

* value is used to drop promiscuity on the device.

5074

* Return 0 if successful or a negative errno code on error.

5059

* Return 0 if successful or a negative errno code on error.

5075

*/

5060

*/

5076

int dev_set_promiscuity(struct net_device *dev, int inc)

5061

int dev_set_promiscuity(struct net_device *dev, int inc)

5077

{

5062

{

5078

unsigned int old_flags = dev->flags;

5063

unsigned int old_flags = dev->flags;

5079

int err;

5064

int err;

5080

5065

5081

err = __dev_set_promiscuity(dev, inc, true);

5066

err = __dev_set_promiscuity(dev, inc, true);

5082

if (err < 0)

5067

if (err < 0)

5083

return err;

5068

return err;

5084

if (dev->flags != old_flags)

5069

if (dev->flags != old_flags)

5085

dev_set_rx_mode(dev);

5070

dev_set_rx_mode(dev);

5086

return err;

5071

return err;

5087

}

5072

}

5088

EXPORT_SYMBOL(dev_set_promiscuity);

5073

EXPORT_SYMBOL(dev_set_promiscuity);

5089

5074

5090

static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)

5075

static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)

5091

{

5076

{

5092

unsigned int old_flags = dev->flags, old_gflags = dev->gflags;

5077

unsigned int old_flags = dev->flags, old_gflags = dev->gflags;

5093

5078

5094

ASSERT_RTNL();

5079

ASSERT_RTNL();

5095

5080

5096

dev->flags |= IFF_ALLMULTI;

5081

dev->flags |= IFF_ALLMULTI;

5097

dev->allmulti += inc;

5082

dev->allmulti += inc;

5098

if (dev->allmulti == 0) {

5083

if (dev->allmulti == 0) {

5099

/*

5084

/*

5100

* Avoid overflow.

5085

* Avoid overflow.

5101

* If inc causes overflow, untouch allmulti and return error.

5086

* If inc causes overflow, untouch allmulti and return error.

5102

*/

5087

*/

5103

if (inc < 0)

5088

if (inc < 0)

5104

dev->flags &= ~IFF_ALLMULTI;

5089

dev->flags &= ~IFF_ALLMULTI;

5105

else {

5090

else {

5106

dev->allmulti -= inc;

5091

dev->allmulti -= inc;

5107

pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",

5092

pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",

5108

dev->name);

5093

dev->name);

5109

return -EOVERFLOW;

5094

return -EOVERFLOW;

5110

}

5095

}

5111

}

5096

}

5112

if (dev->flags ^ old_flags) {

5097

if (dev->flags ^ old_flags) {

5113

dev_change_rx_flags(dev, IFF_ALLMULTI);

5098

dev_change_rx_flags(dev, IFF_ALLMULTI);

5114

dev_set_rx_mode(dev);

5099

dev_set_rx_mode(dev);

5115

if (notify)

5100

if (notify)

5116

__dev_notify_flags(dev, old_flags,

5101

__dev_notify_flags(dev, old_flags,

5117

dev->gflags ^ old_gflags);

5102

dev->gflags ^ old_gflags);

5118

}

5103

}

5119

return 0;

5104

return 0;

5120

}

5105

}

5121

5106

5122

/**

5107

/**

5123

* dev_set_allmulti - update allmulti count on a device

5108

* dev_set_allmulti - update allmulti count on a device

5124

* @dev: device

5109

* @dev: device

5125

* @inc: modifier

5110

* @inc: modifier

5126

*

5111

*

5127

* Add or remove reception of all multicast frames to a device. While the

5112

* Add or remove reception of all multicast frames to a device. While the

5128

* count in the device remains above zero the interface remains listening

5113

* count in the device remains above zero the interface remains listening

5129

* to all interfaces. Once it hits zero the device reverts back to normal

5114

* to all interfaces. Once it hits zero the device reverts back to normal

5130

* filtering operation. A negative @inc value is used to drop the counter

5115

* filtering operation. A negative @inc value is used to drop the counter

5131

* when releasing a resource needing all multicasts.

5116

* when releasing a resource needing all multicasts.

5132

* Return 0 if successful or a negative errno code on error.

5117

* Return 0 if successful or a negative errno code on error.

5133

*/

5118

*/

5134

5119

5135

int dev_set_allmulti(struct net_device *dev, int inc)

5120

int dev_set_allmulti(struct net_device *dev, int inc)

5136

{

5121

{

5137

return __dev_set_allmulti(dev, inc, true);

5122

return __dev_set_allmulti(dev, inc, true);

5138

}

5123

}

5139

EXPORT_SYMBOL(dev_set_allmulti);

5124

EXPORT_SYMBOL(dev_set_allmulti);

5140

5125

5141

/*

5126

/*

5142

* Upload unicast and multicast address lists to device and

5127

* Upload unicast and multicast address lists to device and

5143

* configure RX filtering. When the device doesn't support unicast

5128

* configure RX filtering. When the device doesn't support unicast

5144

* filtering it is put in promiscuous mode while unicast addresses

5129

* filtering it is put in promiscuous mode while unicast addresses

5145

* are present.

5130

* are present.

5146

*/

5131

*/

5147

void __dev_set_rx_mode(struct net_device *dev)

5132

void __dev_set_rx_mode(struct net_device *dev)

5148

{

5133

{

5149

const struct net_device_ops *ops = dev->netdev_ops;

5134

const struct net_device_ops *ops = dev->netdev_ops;

5150

5135

5151

/* dev_open will call this function so the list will stay sane. */

5136

/* dev_open will call this function so the list will stay sane. */

5152

if (!(dev->flags&IFF_UP))

5137

if (!(dev->flags&IFF_UP))

5153

return;

5138

return;

5154

5139

5155

if (!netif_device_present(dev))

5140

if (!netif_device_present(dev))

5156

return;

5141

return;

5157

5142

5158

if (!(dev->priv_flags & IFF_UNICAST_FLT)) {

5143

if (!(dev->priv_flags & IFF_UNICAST_FLT)) {

5159

/* Unicast addresses changes may only happen under the rtnl,

5144

/* Unicast addresses changes may only happen under the rtnl,

5160

* therefore calling __dev_set_promiscuity here is safe.

5145

* therefore calling __dev_set_promiscuity here is safe.

5161

*/

5146

*/

5162

if (!netdev_uc_empty(dev) && !dev->uc_promisc) {

5147

if (!netdev_uc_empty(dev) && !dev->uc_promisc) {

5163

__dev_set_promiscuity(dev, 1, false);

5148

__dev_set_promiscuity(dev, 1, false);

5164

dev->uc_promisc = true;

5149

dev->uc_promisc = true;

5165

} else if (netdev_uc_empty(dev) && dev->uc_promisc) {

5150

} else if (netdev_uc_empty(dev) && dev->uc_promisc) {

5166

__dev_set_promiscuity(dev, -1, false);

5151

__dev_set_promiscuity(dev, -1, false);

5167

dev->uc_promisc = false;

5152

dev->uc_promisc = false;

5168

}

5153

}

5169

}

5154

}

5170

5155

5171

if (ops->ndo_set_rx_mode)

5156

if (ops->ndo_set_rx_mode)

5172

ops->ndo_set_rx_mode(dev);

5157

ops->ndo_set_rx_mode(dev);

5173

}

5158

}

5174

5159

5175

void dev_set_rx_mode(struct net_device *dev)

5160

void dev_set_rx_mode(struct net_device *dev)

5176

{

5161

{

5177

netif_addr_lock_bh(dev);

5162

netif_addr_lock_bh(dev);

5178

__dev_set_rx_mode(dev);

5163

__dev_set_rx_mode(dev);

5179

netif_addr_unlock_bh(dev);

5164

netif_addr_unlock_bh(dev);

5180

}

5165

}

5181

5166

5182

/**

5167

/**

5183

* dev_get_flags - get flags reported to userspace

5168

* dev_get_flags - get flags reported to userspace

5184

* @dev: device

5169

* @dev: device

5185

*

5170

*

5186

* Get the combination of flag bits exported through APIs to userspace.

5171

* Get the combination of flag bits exported through APIs to userspace.

5187

*/

5172

*/

5188

unsigned int dev_get_flags(const struct net_device *dev)

5173

unsigned int dev_get_flags(const struct net_device *dev)

5189

{

5174

{

5190

unsigned int flags;

5175

unsigned int flags;

5191

5176

5192

flags = (dev->flags & ~(IFF_PROMISC |

5177

flags = (dev->flags & ~(IFF_PROMISC |

5193

IFF_ALLMULTI |

5178

IFF_ALLMULTI |

5194

IFF_RUNNING |

5179

IFF_RUNNING |

5195

IFF_LOWER_UP |

5180

IFF_LOWER_UP |

5196

IFF_DORMANT)) |

5181

IFF_DORMANT)) |

5197

(dev->gflags & (IFF_PROMISC |

5182

(dev->gflags & (IFF_PROMISC |

5198

IFF_ALLMULTI));

5183

IFF_ALLMULTI));

5199

5184

5200

if (netif_running(dev)) {

5185

if (netif_running(dev)) {

5201

if (netif_oper_up(dev))

5186

if (netif_oper_up(dev))

5202

flags |= IFF_RUNNING;

5187

flags |= IFF_RUNNING;

5203

if (netif_carrier_ok(dev))

5188

if (netif_carrier_ok(dev))

5204

flags |= IFF_LOWER_UP;

5189

flags |= IFF_LOWER_UP;

5205

if (netif_dormant(dev))

5190

if (netif_dormant(dev))

5206

flags |= IFF_DORMANT;

5191

flags |= IFF_DORMANT;

5207

}

5192

}

5208

5193

5209

return flags;

5194

return flags;

5210

}

5195

}

5211

EXPORT_SYMBOL(dev_get_flags);

5196

EXPORT_SYMBOL(dev_get_flags);

5212

5197

5213

int __dev_change_flags(struct net_device *dev, unsigned int flags)

5198

int __dev_change_flags(struct net_device *dev, unsigned int flags)

5214

{

5199

{

5215

unsigned int old_flags = dev->flags;

5200

unsigned int old_flags = dev->flags;

5216

int ret;

5201

int ret;

5217

5202

5218

ASSERT_RTNL();

5203

ASSERT_RTNL();

5219

5204

5220

/*

5205

/*

5221

* Set the flags on our device.

5206

* Set the flags on our device.

5222

*/

5207

*/

5223

5208

5224

dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |

5209

dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |

5225

IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |

5210

IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |

5226

IFF_AUTOMEDIA)) |

5211

IFF_AUTOMEDIA)) |

5227

(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |

5212

(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |

5228

IFF_ALLMULTI));

5213

IFF_ALLMULTI));

5229

5214

5230

/*

5215

/*

5231

* Load in the correct multicast list now the flags have changed.

5216

* Load in the correct multicast list now the flags have changed.

5232

*/

5217

*/

5233

5218

5234

if ((old_flags ^ flags) & IFF_MULTICAST)

5219

if ((old_flags ^ flags) & IFF_MULTICAST)

5235

dev_change_rx_flags(dev, IFF_MULTICAST);

5220

dev_change_rx_flags(dev, IFF_MULTICAST);

5236

5221

5237

dev_set_rx_mode(dev);

5222

dev_set_rx_mode(dev);

5238

5223

5239

/*

5224

/*

5240

* Have we downed the interface. We handle IFF_UP ourselves

5225

* Have we downed the interface. We handle IFF_UP ourselves

5241

* according to user attempts to set it, rather than blindly

5226

* according to user attempts to set it, rather than blindly

5242

* setting it.

5227

* setting it.

5243

*/

5228

*/

5244

5229

5245

ret = 0;

5230

ret = 0;

5246

if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */

5231

if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */

5247

ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);

5232

ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);

5248

5233

5249

if (!ret)

5234

if (!ret)

5250

dev_set_rx_mode(dev);

5235

dev_set_rx_mode(dev);

5251

}

5236

}

5252

5237

5253

if ((flags ^ dev->gflags) & IFF_PROMISC) {

5238

if ((flags ^ dev->gflags) & IFF_PROMISC) {

5254

int inc = (flags & IFF_PROMISC) ? 1 : -1;

5239

int inc = (flags & IFF_PROMISC) ? 1 : -1;

5255

unsigned int old_flags = dev->flags;

5240

unsigned int old_flags = dev->flags;

5256

5241

5257

dev->gflags ^= IFF_PROMISC;

5242

dev->gflags ^= IFF_PROMISC;

5258

5243

5259

if (__dev_set_promiscuity(dev, inc, false) >= 0)

5244

if (__dev_set_promiscuity(dev, inc, false) >= 0)

5260

if (dev->flags != old_flags)

5245

if (dev->flags != old_flags)

5261

dev_set_rx_mode(dev);

5246

dev_set_rx_mode(dev);

5262

}

5247

}

5263

5248

5264

/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI

5249

/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI

5265

is important. Some (broken) drivers set IFF_PROMISC, when

5250

is important. Some (broken) drivers set IFF_PROMISC, when

5266

IFF_ALLMULTI is requested not asking us and not reporting.

5251

IFF_ALLMULTI is requested not asking us and not reporting.

5267

*/

5252

*/

5268

if ((flags ^ dev->gflags) & IFF_ALLMULTI) {

5253

if ((flags ^ dev->gflags) & IFF_ALLMULTI) {

5269

int inc = (flags & IFF_ALLMULTI) ? 1 : -1;

5254

int inc = (flags & IFF_ALLMULTI) ? 1 : -1;

5270

5255

5271

dev->gflags ^= IFF_ALLMULTI;

5256

dev->gflags ^= IFF_ALLMULTI;

5272

__dev_set_allmulti(dev, inc, false);

5257

__dev_set_allmulti(dev, inc, false);

5273

}

5258

}

5274

5259

5275

return ret;

5260

return ret;

5276

}

5261

}

5277

5262

5278

void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,

5263

void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,

5279

unsigned int gchanges)

5264

unsigned int gchanges)

5280

{

5265

{

5281

unsigned int changes = dev->flags ^ old_flags;

5266

unsigned int changes = dev->flags ^ old_flags;

5282

5267

5283

if (gchanges)

5268

if (gchanges)

5284

rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);

5269

rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);

5285

5270

5286

if (changes & IFF_UP) {

5271

if (changes & IFF_UP) {

5287

if (dev->flags & IFF_UP)

5272

if (dev->flags & IFF_UP)

5288

call_netdevice_notifiers(NETDEV_UP, dev);

5273

call_netdevice_notifiers(NETDEV_UP, dev);

5289

else

5274

else

5290

call_netdevice_notifiers(NETDEV_DOWN, dev);

5275

call_netdevice_notifiers(NETDEV_DOWN, dev);

5291

}

5276

}

5292

5277

5293

if (dev->flags & IFF_UP &&

5278

if (dev->flags & IFF_UP &&

5294

(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {

5279

(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {

5295

struct netdev_notifier_change_info change_info;

5280

struct netdev_notifier_change_info change_info;

5296

5281

5297

change_info.flags_changed = changes;

5282

change_info.flags_changed = changes;

5298

call_netdevice_notifiers_info(NETDEV_CHANGE, dev,

5283

call_netdevice_notifiers_info(NETDEV_CHANGE, dev,

5299

&change_info.info);

5284

&change_info.info);

5300

}

5285

}

5301

}

5286

}

5302

5287

5303

/**

5288

/**

5304

* dev_change_flags - change device settings

5289

* dev_change_flags - change device settings

5305

* @dev: device

5290

* @dev: device

5306

* @flags: device state flags

5291

* @flags: device state flags

5307

*

5292

*

5308

* Change settings on device based state flags. The flags are

5293

* Change settings on device based state flags. The flags are

5309

* in the userspace exported format.

5294

* in the userspace exported format.

5310

*/

5295

*/

5311

int dev_change_flags(struct net_device *dev, unsigned int flags)

5296

int dev_change_flags(struct net_device *dev, unsigned int flags)

5312

{

5297

{

5313

int ret;

5298

int ret;

5314

unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;

5299

unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;

5315

5300

5316

ret = __dev_change_flags(dev, flags);

5301

ret = __dev_change_flags(dev, flags);

5317

if (ret < 0)

5302

if (ret < 0)

5318

return ret;

5303

return ret;

5319

5304

5320

changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);

5305

changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);

5321

__dev_notify_flags(dev, old_flags, changes);

5306

__dev_notify_flags(dev, old_flags, changes);

5322

return ret;

5307

return ret;

5323

}

5308

}

5324

EXPORT_SYMBOL(dev_change_flags);

5309

EXPORT_SYMBOL(dev_change_flags);

5325

5310

5326

/**

5311

/**

5327

* dev_set_mtu - Change maximum transfer unit

5312

* dev_set_mtu - Change maximum transfer unit

5328

* @dev: device

5313

* @dev: device

5329

* @new_mtu: new transfer unit

5314

* @new_mtu: new transfer unit

5330

*

5315

*

5331

* Change the maximum transfer size of the network device.

5316

* Change the maximum transfer size of the network device.

5332

*/

5317

*/

5333

int dev_set_mtu(struct net_device *dev, int new_mtu)

5318

int dev_set_mtu(struct net_device *dev, int new_mtu)

5334

{

5319

{

5335

const struct net_device_ops *ops = dev->netdev_ops;

5320

const struct net_device_ops *ops = dev->netdev_ops;

5336

int err;

5321

int err;

5337

5322

5338

if (new_mtu == dev->mtu)

5323

if (new_mtu == dev->mtu)

5339

return 0;

5324

return 0;

5340

5325

5341

/* MTU must be positive. */

5326

/* MTU must be positive. */

5342

if (new_mtu < 0)

5327

if (new_mtu < 0)

5343

return -EINVAL;

5328

return -EINVAL;

5344

5329

5345

if (!netif_device_present(dev))

5330

if (!netif_device_present(dev))

5346

return -ENODEV;

5331

return -ENODEV;

5347

5332

5348

err = 0;

5333

err = 0;

5349

if (ops->ndo_change_mtu)

5334

if (ops->ndo_change_mtu)

5350

err = ops->ndo_change_mtu(dev, new_mtu);

5335

err = ops->ndo_change_mtu(dev, new_mtu);

5351

else

5336

else

5352

dev->mtu = new_mtu;

5337

dev->mtu = new_mtu;

5353

5338

5354

if (!err)

5339

if (!err)

5355

call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5340

call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5356

return err;

5341

return err;

5357

}

5342

}

5358

EXPORT_SYMBOL(dev_set_mtu);

5343

EXPORT_SYMBOL(dev_set_mtu);

5359

5344

5360

/**

5345

/**

5361

* dev_set_group - Change group this device belongs to

5346

* dev_set_group - Change group this device belongs to

5362

* @dev: device

5347

* @dev: device

5363

* @new_group: group this device should belong to

5348

* @new_group: group this device should belong to

5364

*/

5349

*/

5365

void dev_set_group(struct net_device *dev, int new_group)

5350

void dev_set_group(struct net_device *dev, int new_group)

5366

{

5351

{

5367

dev->group = new_group;

5352

dev->group = new_group;

5368

}

5353

}

5369

EXPORT_SYMBOL(dev_set_group);

5354

EXPORT_SYMBOL(dev_set_group);

5370

5355

5371

/**

5356

/**

5372

* dev_set_mac_address - Change Media Access Control Address

5357

* dev_set_mac_address - Change Media Access Control Address

5373

* @dev: device

5358

* @dev: device

5374

* @sa: new address

5359

* @sa: new address

5375

*

5360

*

5376

* Change the hardware (MAC) address of the device

5361

* Change the hardware (MAC) address of the device

5377

*/

5362

*/

5378

int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)

5363

int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)

5379

{

5364

{

5380

const struct net_device_ops *ops = dev->netdev_ops;

5365

const struct net_device_ops *ops = dev->netdev_ops;

5381

int err;

5366

int err;

5382

5367

5383

if (!ops->ndo_set_mac_address)

5368

if (!ops->ndo_set_mac_address)

5384

return -EOPNOTSUPP;

5369

return -EOPNOTSUPP;

5385

if (sa->sa_family != dev->type)

5370

if (sa->sa_family != dev->type)

5386

return -EINVAL;

5371

return -EINVAL;

5387

if (!netif_device_present(dev))

5372

if (!netif_device_present(dev))

5388

return -ENODEV;

5373

return -ENODEV;

5389

err = ops->ndo_set_mac_address(dev, sa);

5374

err = ops->ndo_set_mac_address(dev, sa);

5390

if (err)

5375

if (err)

5391

return err;

5376

return err;

5392

dev->addr_assign_type = NET_ADDR_SET;

5377

dev->addr_assign_type = NET_ADDR_SET;

5393

call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);

5378

call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);

5394

add_device_randomness(dev->dev_addr, dev->addr_len);

5379

add_device_randomness(dev->dev_addr, dev->addr_len);

5395

return 0;

5380

return 0;

5396

}

5381

}

5397

EXPORT_SYMBOL(dev_set_mac_address);

5382

EXPORT_SYMBOL(dev_set_mac_address);

5398

5383

5399

/**

5384

/**

5400

* dev_change_carrier - Change device carrier

5385

* dev_change_carrier - Change device carrier

5401

* @dev: device

5386

* @dev: device

5402

* @new_carrier: new value

5387

* @new_carrier: new value

5403

*

5388

*

5404

* Change device carrier

5389

* Change device carrier

5405

*/

5390

*/

5406

int dev_change_carrier(struct net_device *dev, bool new_carrier)

5391

int dev_change_carrier(struct net_device *dev, bool new_carrier)

5407

{

5392

{

5408

const struct net_device_ops *ops = dev->netdev_ops;

5393

const struct net_device_ops *ops = dev->netdev_ops;

5409

5394

5410

if (!ops->ndo_change_carrier)

5395

if (!ops->ndo_change_carrier)

5411

return -EOPNOTSUPP;

5396

return -EOPNOTSUPP;

5412

if (!netif_device_present(dev))

5397

if (!netif_device_present(dev))

5413

return -ENODEV;

5398

return -ENODEV;

5414

return ops->ndo_change_carrier(dev, new_carrier);

5399

return ops->ndo_change_carrier(dev, new_carrier);

5415

}

5400

}

5416

EXPORT_SYMBOL(dev_change_carrier);

5401

EXPORT_SYMBOL(dev_change_carrier);

5417

5402

5418

/**

5403

/**

5419

* dev_get_phys_port_id - Get device physical port ID

5404

* dev_get_phys_port_id - Get device physical port ID

5420

* @dev: device

5405

* @dev: device

5421

* @ppid: port ID

5406

* @ppid: port ID

5422

*

5407

*

5423

* Get device physical port ID

5408

* Get device physical port ID

5424

*/

5409

*/

5425

int dev_get_phys_port_id(struct net_device *dev,

5410

int dev_get_phys_port_id(struct net_device *dev,

5426

struct netdev_phys_port_id *ppid)

5411

struct netdev_phys_port_id *ppid)

5427

{

5412

{

5428

const struct net_device_ops *ops = dev->netdev_ops;

5413

const struct net_device_ops *ops = dev->netdev_ops;

5429

5414

5430

if (!ops->ndo_get_phys_port_id)

5415

if (!ops->ndo_get_phys_port_id)

5431

return -EOPNOTSUPP;

5416

return -EOPNOTSUPP;

5432

return ops->ndo_get_phys_port_id(dev, ppid);

5417

return ops->ndo_get_phys_port_id(dev, ppid);

5433

}

5418

}

5434

EXPORT_SYMBOL(dev_get_phys_port_id);

5419

EXPORT_SYMBOL(dev_get_phys_port_id);

5435

5420

5436

/**

5421

/**

5437

* dev_new_index - allocate an ifindex

5422

* dev_new_index - allocate an ifindex

5438

* @net: the applicable net namespace

5423

* @net: the applicable net namespace

5439

*

5424

*

5440

* Returns a suitable unique value for a new device interface

5425

* Returns a suitable unique value for a new device interface

5441

* number. The caller must hold the rtnl semaphore or the

5426

* number. The caller must hold the rtnl semaphore or the

5442

* dev_base_lock to be sure it remains unique.

5427

* dev_base_lock to be sure it remains unique.

5443

*/

5428

*/

5444

static int dev_new_index(struct net *net)

5429

static int dev_new_index(struct net *net)

5445

{

5430

{

5446

int ifindex = net->ifindex;

5431

int ifindex = net->ifindex;

5447

for (;;) {

5432

for (;;) {

5448

if (++ifindex <= 0)

5433

if (++ifindex <= 0)

5449

ifindex = 1;

5434

ifindex = 1;

5450

if (!__dev_get_by_index(net, ifindex))

5435

if (!__dev_get_by_index(net, ifindex))

5451

return net->ifindex = ifindex;

5436

return net->ifindex = ifindex;

5452

}

5437

}

5453

}

5438

}

5454

5439

5455

/* Delayed registration/unregisteration */

5440

/* Delayed registration/unregisteration */

5456

static LIST_HEAD(net_todo_list);

5441

static LIST_HEAD(net_todo_list);

5457

static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);

5442

static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);

5458

5443

5459

static void net_set_todo(struct net_device *dev)

5444

static void net_set_todo(struct net_device *dev)

5460

{

5445

{

5461

list_add_tail(&dev->todo_list, &net_todo_list);

5446

list_add_tail(&dev->todo_list, &net_todo_list);

5462

dev_net(dev)->dev_unreg_count++;

5447

dev_net(dev)->dev_unreg_count++;

5463

}

5448

}

5464

5449

5465

static void rollback_registered_many(struct list_head *head)

5450

static void rollback_registered_many(struct list_head *head)

5466

{

5451

{

5467

struct net_device *dev, *tmp;

5452

struct net_device *dev, *tmp;

5468

LIST_HEAD(close_head);

5453

LIST_HEAD(close_head);

5469

5454

5470

BUG_ON(dev_boot_phase);

5455

BUG_ON(dev_boot_phase);

5471

ASSERT_RTNL();

5456

ASSERT_RTNL();

5472

5457

5473

list_for_each_entry_safe(dev, tmp, head, unreg_list) {

5458

list_for_each_entry_safe(dev, tmp, head, unreg_list) {

5474

/* Some devices call without registering

5459

/* Some devices call without registering

5475

* for initialization unwind. Remove those

5460

* for initialization unwind. Remove those

5476

* devices and proceed with the remaining.

5461

* devices and proceed with the remaining.

5477

*/

5462

*/

5478

if (dev->reg_state == NETREG_UNINITIALIZED) {

5463

if (dev->reg_state == NETREG_UNINITIALIZED) {

5479

pr_debug("unregister_netdevice: device %s/%p never was registered\n",

5464

pr_debug("unregister_netdevice: device %s/%p never was registered\n",

5480

dev->name, dev);

5465

dev->name, dev);

5481

5466

5482

WARN_ON(1);

5467

WARN_ON(1);

5483

list_del(&dev->unreg_list);

5468

list_del(&dev->unreg_list);

5484

continue;

5469

continue;

5485

}

5470

}

5486

dev->dismantle = true;

5471

dev->dismantle = true;

5487

BUG_ON(dev->reg_state != NETREG_REGISTERED);

5472

BUG_ON(dev->reg_state != NETREG_REGISTERED);

5488

}

5473

}

5489

5474

5490

/* If device is running, close it first. */

5475

/* If device is running, close it first. */

5491

list_for_each_entry(dev, head, unreg_list)

5476

list_for_each_entry(dev, head, unreg_list)

5492

list_add_tail(&dev->close_list, &close_head);

5477

list_add_tail(&dev->close_list, &close_head);

5493

dev_close_many(&close_head);

5478

dev_close_many(&close_head);

5494

5479

5495

list_for_each_entry(dev, head, unreg_list) {

5480

list_for_each_entry(dev, head, unreg_list) {

5496

/* And unlink it from device chain. */

5481

/* And unlink it from device chain. */

5497

unlist_netdevice(dev);

5482

unlist_netdevice(dev);

5498

5483

5499

dev->reg_state = NETREG_UNREGISTERING;

5484

dev->reg_state = NETREG_UNREGISTERING;

5500

}

5485

}

5501

5486

5502

synchronize_net();

5487

synchronize_net();

5503

5488

5504

list_for_each_entry(dev, head, unreg_list) {

5489

list_for_each_entry(dev, head, unreg_list) {

5505

/* Shutdown queueing discipline. */

5490

/* Shutdown queueing discipline. */

5506

dev_shutdown(dev);

5491

dev_shutdown(dev);

5507

5492

5508

5493

5509

/* Notify protocols, that we are about to destroy

5494

/* Notify protocols, that we are about to destroy

5510

this device. They should clean all the things.

5495

this device. They should clean all the things.

5511

*/

5496

*/

5512

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

5497

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

5513

5498

5514

if (!dev->rtnl_link_ops ||

5499

if (!dev->rtnl_link_ops ||

5515

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5500

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5516

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

5501

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

5517

5502

5518

/*

5503

/*

5519

* Flush the unicast and multicast chains

5504

* Flush the unicast and multicast chains

5520

*/

5505

*/

5521

dev_uc_flush(dev);

5506

dev_uc_flush(dev);

5522

dev_mc_flush(dev);

5507

dev_mc_flush(dev);

5523

5508

5524

if (dev->netdev_ops->ndo_uninit)

5509

if (dev->netdev_ops->ndo_uninit)

5525

dev->netdev_ops->ndo_uninit(dev);

5510

dev->netdev_ops->ndo_uninit(dev);

5526

5511

5527

/* Notifier chain MUST detach us all upper devices. */

5512

/* Notifier chain MUST detach us all upper devices. */

5528

WARN_ON(netdev_has_any_upper_dev(dev));

5513

WARN_ON(netdev_has_any_upper_dev(dev));

5529

5514

5530

/* Remove entries from kobject tree */

5515

/* Remove entries from kobject tree */

5531

netdev_unregister_kobject(dev);

5516

netdev_unregister_kobject(dev);

5532

#ifdef CONFIG_XPS

5517

#ifdef CONFIG_XPS

5533

/* Remove XPS queueing entries */

5518

/* Remove XPS queueing entries */

5534

netif_reset_xps_queues_gt(dev, 0);

5519

netif_reset_xps_queues_gt(dev, 0);

5535

#endif

5520

#endif

5536

}

5521

}

5537

5522

5538

synchronize_net();

5523

synchronize_net();

5539

5524

5540

list_for_each_entry(dev, head, unreg_list)

5525

list_for_each_entry(dev, head, unreg_list)

5541

dev_put(dev);

5526

dev_put(dev);

5542

}

5527

}

5543

5528

5544

static void rollback_registered(struct net_device *dev)

5529

static void rollback_registered(struct net_device *dev)

5545

{

5530

{

5546

LIST_HEAD(single);

5531

LIST_HEAD(single);

5547

5532

5548

list_add(&dev->unreg_list, &single);

5533

list_add(&dev->unreg_list, &single);

5549

rollback_registered_many(&single);

5534

rollback_registered_many(&single);

5550

list_del(&single);

5535

list_del(&single);

5551

}

5536

}

5552

5537

5553

static netdev_features_t netdev_fix_features(struct net_device *dev,

5538

static netdev_features_t netdev_fix_features(struct net_device *dev,

5554

netdev_features_t features)

5539

netdev_features_t features)

5555

{

5540

{

5556

/* Fix illegal checksum combinations */

5541

/* Fix illegal checksum combinations */

5557

if ((features & NETIF_F_HW_CSUM) &&

5542

if ((features & NETIF_F_HW_CSUM) &&

5558

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5543

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5559

netdev_warn(dev, "mixed HW and IP checksum settings.\n");

5544

netdev_warn(dev, "mixed HW and IP checksum settings.\n");

5560

features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);

5545

features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);

5561

}

5546

}

5562

5547

5563

/* TSO requires that SG is present as well. */

5548

/* TSO requires that SG is present as well. */

5564

if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {

5549

if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {

5565

netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");

5550

netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");

5566

features &= ~NETIF_F_ALL_TSO;

5551

features &= ~NETIF_F_ALL_TSO;

5567

}

5552

}

5568

5553

5569

if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&

5554

if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&

5570

!(features & NETIF_F_IP_CSUM)) {

5555

!(features & NETIF_F_IP_CSUM)) {

5571

netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");

5556

netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");

5572

features &= ~NETIF_F_TSO;

5557

features &= ~NETIF_F_TSO;

5573

features &= ~NETIF_F_TSO_ECN;

5558

features &= ~NETIF_F_TSO_ECN;

5574

}

5559

}

5575

5560

5576

if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&

5561

if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&

5577

!(features & NETIF_F_IPV6_CSUM)) {

5562

!(features & NETIF_F_IPV6_CSUM)) {

5578

netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");

5563

netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");

5579

features &= ~NETIF_F_TSO6;

5564

features &= ~NETIF_F_TSO6;

5580

}

5565

}

5581

5566

5582

/* TSO ECN requires that TSO is present as well. */

5567

/* TSO ECN requires that TSO is present as well. */

5583

if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)

5568

if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)

5584

features &= ~NETIF_F_TSO_ECN;

5569

features &= ~NETIF_F_TSO_ECN;

5585

5570

5586

/* Software GSO depends on SG. */

5571

/* Software GSO depends on SG. */

5587

if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {

5572

if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {

5588

netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");

5573

netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");

5589

features &= ~NETIF_F_GSO;

5574

features &= ~NETIF_F_GSO;

5590

}

5575

}

5591

5576

5592

/* UFO needs SG and checksumming */

5577

/* UFO needs SG and checksumming */

5593

if (features & NETIF_F_UFO) {

5578

if (features & NETIF_F_UFO) {

5594

/* maybe split UFO into V4 and V6? */

5579

/* maybe split UFO into V4 and V6? */

5595

if (!((features & NETIF_F_GEN_CSUM) ||

5580

if (!((features & NETIF_F_GEN_CSUM) ||

5596

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))

5581

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))

5597

== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5582

== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5598

netdev_dbg(dev,

5583

netdev_dbg(dev,

5599

"Dropping NETIF_F_UFO since no checksum offload features.\n");

5584

"Dropping NETIF_F_UFO since no checksum offload features.\n");

5600

features &= ~NETIF_F_UFO;

5585

features &= ~NETIF_F_UFO;

5601

}

5586

}

5602

5587

5603

if (!(features & NETIF_F_SG)) {

5588

if (!(features & NETIF_F_SG)) {

5604

netdev_dbg(dev,

5589

netdev_dbg(dev,

5605

"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");

5590

"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");

5606

features &= ~NETIF_F_UFO;

5591

features &= ~NETIF_F_UFO;

5607

}

5592

}

5608

}

5593

}

5609

5594

5610

return features;

5595

return features;

5611

}

5596

}

5612

5597

5613

int __netdev_update_features(struct net_device *dev)

5598

int __netdev_update_features(struct net_device *dev)

5614

{

5599

{

5615

netdev_features_t features;

5600

netdev_features_t features;

5616

int err = 0;

5601

int err = 0;

5617

5602

5618

ASSERT_RTNL();

5603

ASSERT_RTNL();

5619

5604

5620

features = netdev_get_wanted_features(dev);

5605

features = netdev_get_wanted_features(dev);

5621

5606

5622

if (dev->netdev_ops->ndo_fix_features)

5607

if (dev->netdev_ops->ndo_fix_features)

5623

features = dev->netdev_ops->ndo_fix_features(dev, features);

5608

features = dev->netdev_ops->ndo_fix_features(dev, features);

5624

5609

5625

/* driver might be less strict about feature dependencies */

5610

/* driver might be less strict about feature dependencies */

5626

features = netdev_fix_features(dev, features);

5611

features = netdev_fix_features(dev, features);

5627

5612

5628

if (dev->features == features)

5613

if (dev->features == features)

5629

return 0;

5614

return 0;

5630

5615

5631

netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",

5616

netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",

5632

&dev->features, &features);

5617

&dev->features, &features);

5633

5618

5634

if (dev->netdev_ops->ndo_set_features)

5619

if (dev->netdev_ops->ndo_set_features)

5635

err = dev->netdev_ops->ndo_set_features(dev, features);

5620

err = dev->netdev_ops->ndo_set_features(dev, features);

5636

5621

5637

if (unlikely(err < 0)) {

5622

if (unlikely(err < 0)) {

5638

netdev_err(dev,

5623

netdev_err(dev,

5639

"set_features() failed (%d); wanted %pNF, left %pNF\n",

5624

"set_features() failed (%d); wanted %pNF, left %pNF\n",

5640

err, &features, &dev->features);

5625

err, &features, &dev->features);

5641

return -1;

5626

return -1;

5642

}

5627

}

5643

5628

5644

if (!err)

5629

if (!err)

5645

dev->features = features;

5630

dev->features = features;

5646

5631

5647

return 1;

5632

return 1;

5648

}

5633

}

5649

5634

5650

/**

5635

/**

5651

* netdev_update_features - recalculate device features

5636

* netdev_update_features - recalculate device features

5652

* @dev: the device to check

5637

* @dev: the device to check

5653

*

5638

*

5654

* Recalculate dev->features set and send notifications if it

5639

* Recalculate dev->features set and send notifications if it

5655

* has changed. Should be called after driver or hardware dependent

5640

* has changed. Should be called after driver or hardware dependent

5656

* conditions might have changed that influence the features.

5641

* conditions might have changed that influence the features.

5657

*/

5642

*/

5658

void netdev_update_features(struct net_device *dev)

5643

void netdev_update_features(struct net_device *dev)

5659

{

5644

{

5660

if (__netdev_update_features(dev))

5645

if (__netdev_update_features(dev))

5661

netdev_features_change(dev);

5646

netdev_features_change(dev);

5662

}

5647

}

5663

EXPORT_SYMBOL(netdev_update_features);

5648

EXPORT_SYMBOL(netdev_update_features);

5664

5649

5665

/**

5650

/**

5666

* netdev_change_features - recalculate device features

5651

* netdev_change_features - recalculate device features

5667

* @dev: the device to check

5652

* @dev: the device to check

5668

*

5653

*

5669

* Recalculate dev->features set and send notifications even

5654

* Recalculate dev->features set and send notifications even

5670

* if they have not changed. Should be called instead of

5655

* if they have not changed. Should be called instead of

5671

* netdev_update_features() if also dev->vlan_features might

5656

* netdev_update_features() if also dev->vlan_features might

5672

* have changed to allow the changes to be propagated to stacked

5657

* have changed to allow the changes to be propagated to stacked

5673

* VLAN devices.

5658

* VLAN devices.

5674

*/

5659

*/

5675

void netdev_change_features(struct net_device *dev)

5660

void netdev_change_features(struct net_device *dev)

5676

{

5661

{

5677

__netdev_update_features(dev);

5662

__netdev_update_features(dev);

5678

netdev_features_change(dev);

5663

netdev_features_change(dev);

5679

}

5664

}

5680

EXPORT_SYMBOL(netdev_change_features);

5665

EXPORT_SYMBOL(netdev_change_features);

5681

5666

5682

/**

5667

/**

5683

* netif_stacked_transfer_operstate - transfer operstate

5668

* netif_stacked_transfer_operstate - transfer operstate

5684

* @rootdev: the root or lower level device to transfer state from

5669

* @rootdev: the root or lower level device to transfer state from

5685

* @dev: the device to transfer operstate to

5670

* @dev: the device to transfer operstate to

5686

*

5671

*

5687

* Transfer operational state from root to device. This is normally

5672

* Transfer operational state from root to device. This is normally

5688

* called when a stacking relationship exists between the root

5673

* called when a stacking relationship exists between the root

5689

* device and the device(a leaf device).

5674

* device and the device(a leaf device).

5690

*/

5675

*/

5691

void netif_stacked_transfer_operstate(const struct net_device *rootdev,

5676

void netif_stacked_transfer_operstate(const struct net_device *rootdev,

5692

struct net_device *dev)

5677

struct net_device *dev)

5693

{

5678

{

5694

if (rootdev->operstate == IF_OPER_DORMANT)

5679

if (rootdev->operstate == IF_OPER_DORMANT)

5695

netif_dormant_on(dev);

5680

netif_dormant_on(dev);

5696

else

5681

else

5697

netif_dormant_off(dev);

5682

netif_dormant_off(dev);

5698

5683

5699

if (netif_carrier_ok(rootdev)) {

5684

if (netif_carrier_ok(rootdev)) {

5700

if (!netif_carrier_ok(dev))

5685

if (!netif_carrier_ok(dev))

5701

netif_carrier_on(dev);

5686

netif_carrier_on(dev);

5702

} else {

5687

} else {

5703

if (netif_carrier_ok(dev))

5688

if (netif_carrier_ok(dev))

5704

netif_carrier_off(dev);

5689

netif_carrier_off(dev);

5705

}

5690

}

5706

}

5691

}

5707

EXPORT_SYMBOL(netif_stacked_transfer_operstate);

5692

EXPORT_SYMBOL(netif_stacked_transfer_operstate);

5708

5693

5709

#ifdef CONFIG_RPS

5694

#ifdef CONFIG_RPS

5710

static int netif_alloc_rx_queues(struct net_device *dev)

5695

static int netif_alloc_rx_queues(struct net_device *dev)

5711

{

5696

{

5712

unsigned int i, count = dev->num_rx_queues;

5697

unsigned int i, count = dev->num_rx_queues;

5713

struct netdev_rx_queue *rx;

5698

struct netdev_rx_queue *rx;

5714

5699

5715

BUG_ON(count < 1);

5700

BUG_ON(count < 1);

5716

5701

5717

rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);

5702

rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);

5718

if (!rx)

5703

if (!rx)

5719

return -ENOMEM;

5704

return -ENOMEM;

5720

5705

5721

dev->_rx = rx;

5706

dev->_rx = rx;

5722

5707

5723

for (i = 0; i < count; i++)

5708

for (i = 0; i < count; i++)

5724

rx[i].dev = dev;

5709

rx[i].dev = dev;

5725

return 0;

5710

return 0;

5726

}

5711

}

5727

#endif

5712

#endif

5728

5713

5729

static void netdev_init_one_queue(struct net_device *dev,

5714

static void netdev_init_one_queue(struct net_device *dev,

5730

struct netdev_queue *queue, void *_unused)

5715

struct netdev_queue *queue, void *_unused)

5731

{

5716

{

5732

/* Initialize queue lock */

5717

/* Initialize queue lock */

5733

spin_lock_init(&queue->_xmit_lock);

5718

spin_lock_init(&queue->_xmit_lock);

5734

netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);

5719

netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);

5735

queue->xmit_lock_owner = -1;

5720

queue->xmit_lock_owner = -1;

5736

netdev_queue_numa_node_write(queue, NUMA_NO_NODE);

5721

netdev_queue_numa_node_write(queue, NUMA_NO_NODE);

5737

queue->dev = dev;

5722

queue->dev = dev;

5738

#ifdef CONFIG_BQL

5723

#ifdef CONFIG_BQL

5739

dql_init(&queue->dql, HZ);

5724

dql_init(&queue->dql, HZ);

5740

#endif

5725

#endif

5741

}

5726

}

5742

5727

5743

static void netif_free_tx_queues(struct net_device *dev)

5728

static void netif_free_tx_queues(struct net_device *dev)

5744

{

5729

{

5745

if (is_vmalloc_addr(dev->_tx))

5730

if (is_vmalloc_addr(dev->_tx))

5746

vfree(dev->_tx);

5731

vfree(dev->_tx);

5747

else

5732

else

5748

kfree(dev->_tx);

5733

kfree(dev->_tx);

5749

}

5734

}

5750

5735

5751

static int netif_alloc_netdev_queues(struct net_device *dev)

5736

static int netif_alloc_netdev_queues(struct net_device *dev)

5752

{

5737

{

5753

unsigned int count = dev->num_tx_queues;

5738

unsigned int count = dev->num_tx_queues;

5754

struct netdev_queue *tx;

5739

struct netdev_queue *tx;

5755

size_t sz = count * sizeof(*tx);

5740

size_t sz = count * sizeof(*tx);

5756

5741

5757

BUG_ON(count < 1 || count > 0xffff);

5742

BUG_ON(count < 1 || count > 0xffff);

5758

5743

5759

tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

5744

tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

5760

if (!tx) {

5745

if (!tx) {

5761

tx = vzalloc(sz);

5746

tx = vzalloc(sz);

5762

if (!tx)

5747

if (!tx)

5763

return -ENOMEM;

5748

return -ENOMEM;

5764

}

5749

}

5765

dev->_tx = tx;

5750

dev->_tx = tx;

5766

5751

5767

netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);

5752

netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);

5768

spin_lock_init(&dev->tx_global_lock);

5753

spin_lock_init(&dev->tx_global_lock);

5769

5754

5770

return 0;

5755

return 0;

5771

}

5756

}

5772

5757

5773

/**

5758

/**

5774

* register_netdevice - register a network device

5759

* register_netdevice - register a network device

5775

* @dev: device to register

5760

* @dev: device to register

5776

*

5761

*

5777

* Take a completed network device structure and add it to the kernel

5762

* Take a completed network device structure and add it to the kernel

5778

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5763

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5779

* chain. 0 is returned on success. A negative errno code is returned

5764

* chain. 0 is returned on success. A negative errno code is returned

5780

* on a failure to set up the device, or if the name is a duplicate.

5765

* on a failure to set up the device, or if the name is a duplicate.

5781

*

5766

*

5782

* Callers must hold the rtnl semaphore. You may want

5767

* Callers must hold the rtnl semaphore. You may want

5783

* register_netdev() instead of this.

5768

* register_netdev() instead of this.

5784

*

5769

*

5785

* BUGS:

5770

* BUGS:

5786

* The locking appears insufficient to guarantee two parallel registers

5771

* The locking appears insufficient to guarantee two parallel registers

5787

* will not get the same name.

5772

* will not get the same name.

5788

*/

5773

*/

5789

5774

5790

int register_netdevice(struct net_device *dev)

5775

int register_netdevice(struct net_device *dev)

5791

{

5776

{

5792

int ret;

5777

int ret;

5793

struct net *net = dev_net(dev);

5778

struct net *net = dev_net(dev);

5794

5779

5795

BUG_ON(dev_boot_phase);

5780

BUG_ON(dev_boot_phase);

5796

ASSERT_RTNL();

5781

ASSERT_RTNL();

5797

5782

5798

might_sleep();

5783

might_sleep();

5799

5784

5800

/* When net_device's are persistent, this will be fatal. */

5785

/* When net_device's are persistent, this will be fatal. */

5801

BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);

5786

BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);

5802

BUG_ON(!net);

5787

BUG_ON(!net);

5803

5788

5804

spin_lock_init(&dev->addr_list_lock);

5789

spin_lock_init(&dev->addr_list_lock);

5805

netdev_set_addr_lockdep_class(dev);

5790

netdev_set_addr_lockdep_class(dev);

5806

5791

5807

dev->iflink = -1;

5792

dev->iflink = -1;

5808

5793

5809

ret = dev_get_valid_name(net, dev, dev->name);

5794

ret = dev_get_valid_name(net, dev, dev->name);

5810

if (ret < 0)

5795

if (ret < 0)

5811

goto out;

5796

goto out;

5812

5797

5813

/* Init, if this function is available */

5798

/* Init, if this function is available */

5814

if (dev->netdev_ops->ndo_init) {

5799

if (dev->netdev_ops->ndo_init) {

5815

ret = dev->netdev_ops->ndo_init(dev);

5800

ret = dev->netdev_ops->ndo_init(dev);

5816

if (ret) {

5801

if (ret) {

5817

if (ret > 0)

5802

if (ret > 0)

5818

ret = -EIO;

5803

ret = -EIO;

5819

goto out;

5804

goto out;

5820

}

5805

}

5821

}

5806

}

5822

5807

5823

if (((dev->hw_features | dev->features) &

5808

if (((dev->hw_features | dev->features) &

5824

NETIF_F_HW_VLAN_CTAG_FILTER) &&

5809

NETIF_F_HW_VLAN_CTAG_FILTER) &&

5825

(!dev->netdev_ops->ndo_vlan_rx_add_vid ||

5810

(!dev->netdev_ops->ndo_vlan_rx_add_vid ||

5826

!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {

5811

!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {

5827

netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");

5812

netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");

5828

ret = -EINVAL;

5813

ret = -EINVAL;

5829

goto err_uninit;

5814

goto err_uninit;

5830

}

5815

}

5831

5816

5832

ret = -EBUSY;

5817

ret = -EBUSY;

5833

if (!dev->ifindex)

5818

if (!dev->ifindex)

5834

dev->ifindex = dev_new_index(net);

5819

dev->ifindex = dev_new_index(net);

5835

else if (__dev_get_by_index(net, dev->ifindex))

5820

else if (__dev_get_by_index(net, dev->ifindex))

5836

goto err_uninit;

5821

goto err_uninit;

5837

5822

5838

if (dev->iflink == -1)

5823

if (dev->iflink == -1)

5839

dev->iflink = dev->ifindex;

5824

dev->iflink = dev->ifindex;

5840

5825

5841

/* Transfer changeable features to wanted_features and enable

5826

/* Transfer changeable features to wanted_features and enable

5842

* software offloads (GSO and GRO).

5827

* software offloads (GSO and GRO).

5843

*/

5828

*/

5844

dev->hw_features |= NETIF_F_SOFT_FEATURES;

5829

dev->hw_features |= NETIF_F_SOFT_FEATURES;

5845

dev->features |= NETIF_F_SOFT_FEATURES;

5830

dev->features |= NETIF_F_SOFT_FEATURES;

5846

dev->wanted_features = dev->features & dev->hw_features;

5831

dev->wanted_features = dev->features & dev->hw_features;

5847

5832

5848

/* Turn on no cache copy if HW is doing checksum */

5833

/* Turn on no cache copy if HW is doing checksum */

5849

if (!(dev->flags & IFF_LOOPBACK)) {

5834

if (!(dev->flags & IFF_LOOPBACK)) {

5850

dev->hw_features |= NETIF_F_NOCACHE_COPY;

5835

dev->hw_features |= NETIF_F_NOCACHE_COPY;

5851

if (dev->features & NETIF_F_ALL_CSUM) {

5836

if (dev->features & NETIF_F_ALL_CSUM) {

5852

dev->wanted_features |= NETIF_F_NOCACHE_COPY;

5837

dev->wanted_features |= NETIF_F_NOCACHE_COPY;

5853

dev->features |= NETIF_F_NOCACHE_COPY;

5838

dev->features |= NETIF_F_NOCACHE_COPY;

5854

}

5839

}

5855

}

5840

}

5856

5841

5857

/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.

5842

/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.

5858

*/

5843

*/

5859

dev->vlan_features |= NETIF_F_HIGHDMA;

5844

dev->vlan_features |= NETIF_F_HIGHDMA;

5860

5845

5861

/* Make NETIF_F_SG inheritable to tunnel devices.

5846

/* Make NETIF_F_SG inheritable to tunnel devices.

5862

*/

5847

*/

5863

dev->hw_enc_features |= NETIF_F_SG;

5848

dev->hw_enc_features |= NETIF_F_SG;

5864

5849

5865

/* Make NETIF_F_SG inheritable to MPLS.

5850

/* Make NETIF_F_SG inheritable to MPLS.

5866

*/

5851

*/

5867

dev->mpls_features |= NETIF_F_SG;

5852

dev->mpls_features |= NETIF_F_SG;

5868

5853

5869

ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);

5854

ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);

5870

ret = notifier_to_errno(ret);

5855

ret = notifier_to_errno(ret);

5871

if (ret)

5856

if (ret)

5872

goto err_uninit;

5857

goto err_uninit;

5873

5858

5874

ret = netdev_register_kobject(dev);

5859

ret = netdev_register_kobject(dev);

5875

if (ret)

5860

if (ret)

5876

goto err_uninit;

5861

goto err_uninit;

5877

dev->reg_state = NETREG_REGISTERED;

5862

dev->reg_state = NETREG_REGISTERED;

5878

5863

5879

__netdev_update_features(dev);

5864

__netdev_update_features(dev);

5880

5865

5881

/*

5866

/*

5882

* Default initial state at registry is that the

5867

* Default initial state at registry is that the

5883

* device is present.

5868

* device is present.

5884

*/

5869

*/

5885

5870

5886

set_bit(__LINK_STATE_PRESENT, &dev->state);

5871

set_bit(__LINK_STATE_PRESENT, &dev->state);

5887

5872

5888

linkwatch_init_dev(dev);

5873

linkwatch_init_dev(dev);

5889

5874

5890

dev_init_scheduler(dev);

5875

dev_init_scheduler(dev);

5891

dev_hold(dev);

5876

dev_hold(dev);

5892

list_netdevice(dev);

5877

list_netdevice(dev);

5893

add_device_randomness(dev->dev_addr, dev->addr_len);

5878

add_device_randomness(dev->dev_addr, dev->addr_len);

5894

5879

5895

/* If the device has permanent device address, driver should

5880

/* If the device has permanent device address, driver should

5896

* set dev_addr and also addr_assign_type should be set to

5881

* set dev_addr and also addr_assign_type should be set to

5897

* NET_ADDR_PERM (default value).

5882

* NET_ADDR_PERM (default value).

5898

*/

5883

*/

5899

if (dev->addr_assign_type == NET_ADDR_PERM)

5884

if (dev->addr_assign_type == NET_ADDR_PERM)

5900

memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

5885

memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

5901

5886

5902

/* Notify protocols, that a new device appeared. */

5887

/* Notify protocols, that a new device appeared. */

5903

ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);

5888

ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);

5904

ret = notifier_to_errno(ret);

5889

ret = notifier_to_errno(ret);

5905

if (ret) {

5890

if (ret) {

5906

rollback_registered(dev);

5891

rollback_registered(dev);

5907

dev->reg_state = NETREG_UNREGISTERED;

5892

dev->reg_state = NETREG_UNREGISTERED;

5908

}

5893

}

5909

/*

5894

/*

5910

* Prevent userspace races by waiting until the network

5895

* Prevent userspace races by waiting until the network

5911

* device is fully setup before sending notifications.

5896

* device is fully setup before sending notifications.

5912

*/

5897

*/

5913

if (!dev->rtnl_link_ops ||

5898

if (!dev->rtnl_link_ops ||

5914

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5899

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5915

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

5900

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

5916

5901

5917

out:

5902

out:

5918

return ret;

5903

return ret;

5919

5904

5920

err_uninit:

5905

err_uninit:

5921

if (dev->netdev_ops->ndo_uninit)

5906

if (dev->netdev_ops->ndo_uninit)

5922

dev->netdev_ops->ndo_uninit(dev);

5907

dev->netdev_ops->ndo_uninit(dev);

5923

goto out;

5908

goto out;

5924

}

5909

}

5925

EXPORT_SYMBOL(register_netdevice);

5910

EXPORT_SYMBOL(register_netdevice);

5926

5911

5927

/**

5912

/**

5928

* init_dummy_netdev - init a dummy network device for NAPI

5913

* init_dummy_netdev - init a dummy network device for NAPI

5929

* @dev: device to init

5914

* @dev: device to init

5930

*

5915

*

5931

* This takes a network device structure and initialize the minimum

5916

* This takes a network device structure and initialize the minimum

5932

* amount of fields so it can be used to schedule NAPI polls without

5917

* amount of fields so it can be used to schedule NAPI polls without

5933

* registering a full blown interface. This is to be used by drivers

5918

* registering a full blown interface. This is to be used by drivers

5934

* that need to tie several hardware interfaces to a single NAPI

5919

* that need to tie several hardware interfaces to a single NAPI

5935

* poll scheduler due to HW limitations.

5920

* poll scheduler due to HW limitations.

5936

*/

5921

*/

5937

int init_dummy_netdev(struct net_device *dev)

5922

int init_dummy_netdev(struct net_device *dev)

5938

{

5923

{

5939

/* Clear everything. Note we don't initialize spinlocks

5924

/* Clear everything. Note we don't initialize spinlocks

5940

* are they aren't supposed to be taken by any of the

5925

* are they aren't supposed to be taken by any of the

5941

* NAPI code and this dummy netdev is supposed to be

5926

* NAPI code and this dummy netdev is supposed to be

5942

* only ever used for NAPI polls

5927

* only ever used for NAPI polls

5943

*/

5928

*/

5944

memset(dev, 0, sizeof(struct net_device));

5929

memset(dev, 0, sizeof(struct net_device));

5945

5930

5946

/* make sure we BUG if trying to hit standard

5931

/* make sure we BUG if trying to hit standard

5947

* register/unregister code path

5932

* register/unregister code path

5948

*/

5933

*/

5949

dev->reg_state = NETREG_DUMMY;

5934

dev->reg_state = NETREG_DUMMY;

5950

5935

5951

/* NAPI wants this */

5936

/* NAPI wants this */

5952

INIT_LIST_HEAD(&dev->napi_list);

5937

INIT_LIST_HEAD(&dev->napi_list);

5953

5938

5954

/* a dummy interface is started by default */

5939

/* a dummy interface is started by default */

5955

set_bit(__LINK_STATE_PRESENT, &dev->state);

5940

set_bit(__LINK_STATE_PRESENT, &dev->state);

5956

set_bit(__LINK_STATE_START, &dev->state);

5941

set_bit(__LINK_STATE_START, &dev->state);

5957

5942

5958

/* Note : We dont allocate pcpu_refcnt for dummy devices,

5943

/* Note : We dont allocate pcpu_refcnt for dummy devices,

5959

* because users of this 'device' dont need to change

5944

* because users of this 'device' dont need to change

5960

* its refcount.

5945

* its refcount.

5961

*/

5946

*/

5962

5947

5963

return 0;

5948

return 0;

5964

}

5949

}

5965

EXPORT_SYMBOL_GPL(init_dummy_netdev);

5950

EXPORT_SYMBOL_GPL(init_dummy_netdev);

5966

5951

5967

5952

5968

/**

5953

/**

5969

* register_netdev - register a network device

5954

* register_netdev - register a network device

5970

* @dev: device to register

5955

* @dev: device to register

5971

*

5956

*

5972

* Take a completed network device structure and add it to the kernel

5957

* Take a completed network device structure and add it to the kernel

5973

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5958

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5974

* chain. 0 is returned on success. A negative errno code is returned

5959

* chain. 0 is returned on success. A negative errno code is returned

5975

* on a failure to set up the device, or if the name is a duplicate.

5960

* on a failure to set up the device, or if the name is a duplicate.

5976

*

5961

*

5977

* This is a wrapper around register_netdevice that takes the rtnl semaphore

5962

* This is a wrapper around register_netdevice that takes the rtnl semaphore

5978

* and expands the device name if you passed a format string to

5963

* and expands the device name if you passed a format string to

5979

* alloc_netdev.

5964

* alloc_netdev.

5980

*/

5965

*/

5981

int register_netdev(struct net_device *dev)

5966

int register_netdev(struct net_device *dev)

5982

{

5967

{

5983

int err;

5968

int err;

5984

5969

5985

rtnl_lock();

5970

rtnl_lock();

5986

err = register_netdevice(dev);

5971

err = register_netdevice(dev);

5987

rtnl_unlock();

5972

rtnl_unlock();

5988

return err;

5973

return err;

5989

}

5974

}

5990

EXPORT_SYMBOL(register_netdev);

5975

EXPORT_SYMBOL(register_netdev);

5991

5976

5992

int netdev_refcnt_read(const struct net_device *dev)

5977

int netdev_refcnt_read(const struct net_device *dev)

5993

{

5978

{

5994

int i, refcnt = 0;

5979

int i, refcnt = 0;

5995

5980

5996

for_each_possible_cpu(i)

5981

for_each_possible_cpu(i)

5997

refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);

5982

refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);

5998

return refcnt;

5983

return refcnt;

5999

}

5984

}

6000

EXPORT_SYMBOL(netdev_refcnt_read);

5985

EXPORT_SYMBOL(netdev_refcnt_read);

6001

5986

6002

/**

5987

/**

6003

* netdev_wait_allrefs - wait until all references are gone.

5988

* netdev_wait_allrefs - wait until all references are gone.

6004

* @dev: target net_device

5989

* @dev: target net_device

6005

*

5990

*

6006

* This is called when unregistering network devices.

5991

* This is called when unregistering network devices.

6007

*

5992

*

6008

* Any protocol or device that holds a reference should register

5993

* Any protocol or device that holds a reference should register

6009

* for netdevice notification, and cleanup and put back the

5994

* for netdevice notification, and cleanup and put back the

6010

* reference if they receive an UNREGISTER event.

5995

* reference if they receive an UNREGISTER event.

6011

* We can get stuck here if buggy protocols don't correctly

5996

* We can get stuck here if buggy protocols don't correctly

6012

* call dev_put.

5997

* call dev_put.

6013

*/

5998

*/

6014

static void netdev_wait_allrefs(struct net_device *dev)

5999

static void netdev_wait_allrefs(struct net_device *dev)

6015

{

6000

{

6016

unsigned long rebroadcast_time, warning_time;

6001

unsigned long rebroadcast_time, warning_time;

6017

int refcnt;

6002

int refcnt;

6018

6003

6019

linkwatch_forget_dev(dev);

6004

linkwatch_forget_dev(dev);

6020

6005

6021

rebroadcast_time = warning_time = jiffies;

6006

rebroadcast_time = warning_time = jiffies;

6022

refcnt = netdev_refcnt_read(dev);

6007

refcnt = netdev_refcnt_read(dev);

6023

6008

6024

while (refcnt != 0) {

6009

while (refcnt != 0) {

6025

if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {

6010

if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {

6026

rtnl_lock();

6011

rtnl_lock();

6027

6012

6028

/* Rebroadcast unregister notification */

6013

/* Rebroadcast unregister notification */

6029

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6014

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6030

6015

6031

__rtnl_unlock();

6016

__rtnl_unlock();

6032

rcu_barrier();

6017

rcu_barrier();

6033

rtnl_lock();

6018

rtnl_lock();

6034

6019

6035

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6020

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6036

if (test_bit(__LINK_STATE_LINKWATCH_PENDING,

6021

if (test_bit(__LINK_STATE_LINKWATCH_PENDING,

6037

&dev->state)) {

6022

&dev->state)) {

6038

/* We must not have linkwatch events

6023

/* We must not have linkwatch events

6039

* pending on unregister. If this

6024

* pending on unregister. If this

6040

* happens, we simply run the queue

6025

* happens, we simply run the queue

6041

* unscheduled, resulting in a noop

6026

* unscheduled, resulting in a noop

6042

* for this device.

6027

* for this device.

6043

*/

6028

*/

6044

linkwatch_run_queue();

6029

linkwatch_run_queue();

6045

}

6030

}

6046

6031

6047

__rtnl_unlock();

6032

__rtnl_unlock();

6048

6033

6049

rebroadcast_time = jiffies;

6034

rebroadcast_time = jiffies;

6050

}

6035

}

6051

6036

6052

msleep(250);

6037

msleep(250);

6053

6038

6054

refcnt = netdev_refcnt_read(dev);

6039

refcnt = netdev_refcnt_read(dev);

6055

6040

6056

if (time_after(jiffies, warning_time + 10 * HZ)) {

6041

if (time_after(jiffies, warning_time + 10 * HZ)) {

6057

pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",

6042

pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",

6058

dev->name, refcnt);

6043

dev->name, refcnt);

6059

warning_time = jiffies;

6044

warning_time = jiffies;

6060

}

6045

}

6061

}

6046

}

6062

}

6047

}

6063

6048

6064

/* The sequence is:

6049

/* The sequence is:

6065

*

6050

*

6066

* rtnl_lock();

6051

* rtnl_lock();

6067

* ...

6052

* ...

6068

* register_netdevice(x1);

6053

* register_netdevice(x1);

6069

* register_netdevice(x2);

6054

* register_netdevice(x2);

6070

* ...

6055

* ...

6071

* unregister_netdevice(y1);

6056

* unregister_netdevice(y1);

6072

* unregister_netdevice(y2);

6057

* unregister_netdevice(y2);

6073

* ...

6058

* ...

6074

* rtnl_unlock();

6059

* rtnl_unlock();

6075

* free_netdev(y1);

6060

* free_netdev(y1);

6076

* free_netdev(y2);

6061

* free_netdev(y2);

6077

*

6062

*

6078

* We are invoked by rtnl_unlock().

6063

* We are invoked by rtnl_unlock().

6079

* This allows us to deal with problems:

6064

* This allows us to deal with problems:

6080

* 1) We can delete sysfs objects which invoke hotplug

6065

* 1) We can delete sysfs objects which invoke hotplug

6081

* without deadlocking with linkwatch via keventd.

6066

* without deadlocking with linkwatch via keventd.

6082

* 2) Since we run with the RTNL semaphore not held, we can sleep

6067

* 2) Since we run with the RTNL semaphore not held, we can sleep

6083

* safely in order to wait for the netdev refcnt to drop to zero.

6068

* safely in order to wait for the netdev refcnt to drop to zero.

6084

*

6069

*

6085

* We must not return until all unregister events added during

6070

* We must not return until all unregister events added during

6086

* the interval the lock was held have been completed.

6071

* the interval the lock was held have been completed.

6087

*/

6072

*/

6088

void netdev_run_todo(void)

6073

void netdev_run_todo(void)

6089

{

6074

{

6090

struct list_head list;

6075

struct list_head list;

6091

6076

6092

/* Snapshot list, allow later requests */

6077

/* Snapshot list, allow later requests */

6093

list_replace_init(&net_todo_list, &list);

6078

list_replace_init(&net_todo_list, &list);

6094

6079

6095

__rtnl_unlock();

6080

__rtnl_unlock();

6096

6081

6097

6082

6098

/* Wait for rcu callbacks to finish before next phase */

6083

/* Wait for rcu callbacks to finish before next phase */

6099

if (!list_empty(&list))

6084

if (!list_empty(&list))

6100

rcu_barrier();

6085

rcu_barrier();

6101

6086

6102

while (!list_empty(&list)) {

6087

while (!list_empty(&list)) {

6103

struct net_device *dev

6088

struct net_device *dev

6104

= list_first_entry(&list, struct net_device, todo_list);

6089

= list_first_entry(&list, struct net_device, todo_list);

6105

list_del(&dev->todo_list);

6090

list_del(&dev->todo_list);

6106

6091

6107

rtnl_lock();

6092

rtnl_lock();

6108

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6093

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6109

__rtnl_unlock();

6094

__rtnl_unlock();

6110

6095

6111

if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {

6096

if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {

6112

pr_err("network todo '%s' but state %d\n",

6097

pr_err("network todo '%s' but state %d\n",

6113

dev->name, dev->reg_state);

6098

dev->name, dev->reg_state);

6114

dump_stack();

6099

dump_stack();

6115

continue;

6100

continue;

6116

}

6101

}

6117

6102

6118

dev->reg_state = NETREG_UNREGISTERED;

6103

dev->reg_state = NETREG_UNREGISTERED;

6119

6104

6120

on_each_cpu(flush_backlog, dev, 1);

6105

on_each_cpu(flush_backlog, dev, 1);

6121

6106

6122

netdev_wait_allrefs(dev);

6107

netdev_wait_allrefs(dev);

6123

6108

6124

/* paranoia */

6109

/* paranoia */

6125

BUG_ON(netdev_refcnt_read(dev));

6110

BUG_ON(netdev_refcnt_read(dev));

6126

WARN_ON(rcu_access_pointer(dev->ip_ptr));

6111

WARN_ON(rcu_access_pointer(dev->ip_ptr));

6127

WARN_ON(rcu_access_pointer(dev->ip6_ptr));

6112

WARN_ON(rcu_access_pointer(dev->ip6_ptr));

6128

WARN_ON(dev->dn_ptr);

6113

WARN_ON(dev->dn_ptr);

6129

6114

6130

if (dev->destructor)

6115

if (dev->destructor)

6131

dev->destructor(dev);

6116

dev->destructor(dev);

6132

6117

6133

/* Report a network device has been unregistered */

6118

/* Report a network device has been unregistered */

6134

rtnl_lock();

6119

rtnl_lock();

6135

dev_net(dev)->dev_unreg_count--;

6120

dev_net(dev)->dev_unreg_count--;

6136

__rtnl_unlock();

6121

__rtnl_unlock();

6137

wake_up(&netdev_unregistering_wq);

6122

wake_up(&netdev_unregistering_wq);

6138

6123

6139

/* Free network device */

6124

/* Free network device */

6140

kobject_put(&dev->dev.kobj);

6125

kobject_put(&dev->dev.kobj);

6141

}

6126

}

6142

}

6127

}

6143

6128

6144

/* Convert net_device_stats to rtnl_link_stats64. They have the same

6129

/* Convert net_device_stats to rtnl_link_stats64. They have the same

6145

* fields in the same order, with only the type differing.

6130

* fields in the same order, with only the type differing.

6146

*/

6131

*/

6147

void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,

6132

void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,

6148

const struct net_device_stats *netdev_stats)

6133

const struct net_device_stats *netdev_stats)

6149

{

6134

{

6150

#if BITS_PER_LONG == 64

6135

#if BITS_PER_LONG == 64

6151

BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));

6136

BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));

6152

memcpy(stats64, netdev_stats, sizeof(*stats64));

6137

memcpy(stats64, netdev_stats, sizeof(*stats64));

6153

#else

6138

#else

6154

size_t i, n = sizeof(*stats64) / sizeof(u64);

6139

size_t i, n = sizeof(*stats64) / sizeof(u64);

6155

const unsigned long *src = (const unsigned long *)netdev_stats;

6140

const unsigned long *src = (const unsigned long *)netdev_stats;

6156

u64 *dst = (u64 *)stats64;

6141

u64 *dst = (u64 *)stats64;

6157

6142

6158

BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=

6143

BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=

6159

sizeof(*stats64) / sizeof(u64));

6144

sizeof(*stats64) / sizeof(u64));

6160

for (i = 0; i < n; i++)

6145

for (i = 0; i < n; i++)

6161

dst[i] = src[i];

6146

dst[i] = src[i];

6162

#endif

6147

#endif

6163

}

6148

}

6164

EXPORT_SYMBOL(netdev_stats_to_stats64);

6149

EXPORT_SYMBOL(netdev_stats_to_stats64);

6165

6150

6166

/**

6151

/**

6167

* dev_get_stats - get network device statistics

6152

* dev_get_stats - get network device statistics

6168

* @dev: device to get statistics from

6153

* @dev: device to get statistics from

6169

* @storage: place to store stats

6154

* @storage: place to store stats

6170

*

6155

*

6171

* Get network statistics from device. Return @storage.

6156

* Get network statistics from device. Return @storage.

6172

* The device driver may provide its own method by setting

6157

* The device driver may provide its own method by setting

6173

* dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;

6158

* dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;

6174

* otherwise the internal statistics structure is used.

6159

* otherwise the internal statistics structure is used.

6175

*/

6160

*/

6176

struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,

6161

struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,

6177

struct rtnl_link_stats64 *storage)

6162

struct rtnl_link_stats64 *storage)

6178

{

6163

{

6179

const struct net_device_ops *ops = dev->netdev_ops;

6164

const struct net_device_ops *ops = dev->netdev_ops;

6180

6165

6181

if (ops->ndo_get_stats64) {

6166

if (ops->ndo_get_stats64) {

6182

memset(storage, 0, sizeof(*storage));

6167

memset(storage, 0, sizeof(*storage));

6183

ops->ndo_get_stats64(dev, storage);

6168

ops->ndo_get_stats64(dev, storage);

6184

} else if (ops->ndo_get_stats) {

6169

} else if (ops->ndo_get_stats) {

6185

netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));

6170

netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));

6186

} else {

6171

} else {

6187

netdev_stats_to_stats64(storage, &dev->stats);

6172

netdev_stats_to_stats64(storage, &dev->stats);

6188

}

6173

}

6189

storage->rx_dropped += atomic_long_read(&dev->rx_dropped);

6174

storage->rx_dropped += atomic_long_read(&dev->rx_dropped);

6190

return storage;

6175

return storage;

6191

}

6176

}

6192

EXPORT_SYMBOL(dev_get_stats);

6177

EXPORT_SYMBOL(dev_get_stats);

6193

6178

6194

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)

6179

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)

6195

{

6180

{

6196

struct netdev_queue *queue = dev_ingress_queue(dev);

6181

struct netdev_queue *queue = dev_ingress_queue(dev);

6197

6182

6198

#ifdef CONFIG_NET_CLS_ACT

6183

#ifdef CONFIG_NET_CLS_ACT

6199

if (queue)

6184

if (queue)

6200

return queue;

6185

return queue;

6201

queue = kzalloc(sizeof(*queue), GFP_KERNEL);

6186

queue = kzalloc(sizeof(*queue), GFP_KERNEL);

6202

if (!queue)

6187

if (!queue)

6203

return NULL;

6188

return NULL;

6204

netdev_init_one_queue(dev, queue, NULL);

6189

netdev_init_one_queue(dev, queue, NULL);

6205

queue->qdisc = &noop_qdisc;

6190

queue->qdisc = &noop_qdisc;

6206

queue->qdisc_sleeping = &noop_qdisc;

6191

queue->qdisc_sleeping = &noop_qdisc;

6207

rcu_assign_pointer(dev->ingress_queue, queue);

6192

rcu_assign_pointer(dev->ingress_queue, queue);

6208

#endif

6193

#endif

6209

return queue;

6194

return queue;

6210

}

6195

}

6211

6196

6212

static const struct ethtool_ops default_ethtool_ops;

6197

static const struct ethtool_ops default_ethtool_ops;

6213

6198

6214

void netdev_set_default_ethtool_ops(struct net_device *dev,

6199

void netdev_set_default_ethtool_ops(struct net_device *dev,

6215

const struct ethtool_ops *ops)

6200

const struct ethtool_ops *ops)

6216

{

6201

{

6217

if (dev->ethtool_ops == &default_ethtool_ops)

6202

if (dev->ethtool_ops == &default_ethtool_ops)

6218

dev->ethtool_ops = ops;

6203

dev->ethtool_ops = ops;

6219

}

6204

}

6220

EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);

6205

EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);

6221

6206

6222

void netdev_freemem(struct net_device *dev)

6207

void netdev_freemem(struct net_device *dev)

6223

{

6208

{

6224

char *addr = (char *)dev - dev->padded;

6209

char *addr = (char *)dev - dev->padded;

6225

6210

6226

if (is_vmalloc_addr(addr))

6211

if (is_vmalloc_addr(addr))

6227

vfree(addr);

6212

vfree(addr);

6228

else

6213

else

6229

kfree(addr);

6214

kfree(addr);

6230

}

6215

}

6231

6216

6232

/**

6217

/**

6233

* alloc_netdev_mqs - allocate network device

6218

* alloc_netdev_mqs - allocate network device

6234

* @sizeof_priv: size of private data to allocate space for

6219

* @sizeof_priv: size of private data to allocate space for

6235

* @name: device name format string

6220

* @name: device name format string

6236

* @setup: callback to initialize device

6221

* @setup: callback to initialize device

6237

* @txqs: the number of TX subqueues to allocate

6222

* @txqs: the number of TX subqueues to allocate

6238

* @rxqs: the number of RX subqueues to allocate

6223

* @rxqs: the number of RX subqueues to allocate

6239

*

6224

*

6240

* Allocates a struct net_device with private data area for driver use

6225

* Allocates a struct net_device with private data area for driver use

6241

* and performs basic initialization. Also allocates subquue structs

6226

* and performs basic initialization. Also allocates subquue structs

6242

* for each queue on the device.

6227

* for each queue on the device.

6243

*/

6228

*/

6244

struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,

6229

struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,

6245

void (*setup)(struct net_device *),

6230

void (*setup)(struct net_device *),

6246

unsigned int txqs, unsigned int rxqs)

6231

unsigned int txqs, unsigned int rxqs)

6247

{

6232

{

6248

struct net_device *dev;

6233

struct net_device *dev;

6249

size_t alloc_size;

6234

size_t alloc_size;

6250

struct net_device *p;

6235

struct net_device *p;

6251

6236

6252

BUG_ON(strlen(name) >= sizeof(dev->name));

6237

BUG_ON(strlen(name) >= sizeof(dev->name));

6253

6238

6254

if (txqs < 1) {

6239

if (txqs < 1) {

6255

pr_err("alloc_netdev: Unable to allocate device with zero queues\n");

6240

pr_err("alloc_netdev: Unable to allocate device with zero queues\n");

6256

return NULL;

6241

return NULL;

6257

}

6242

}

6258

6243

6259

#ifdef CONFIG_RPS

6244

#ifdef CONFIG_RPS

6260

if (rxqs < 1) {

6245

if (rxqs < 1) {

6261

pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");

6246

pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");

6262

return NULL;

6247

return NULL;

6263

}

6248

}

6264

#endif

6249

#endif

6265

6250

6266

alloc_size = sizeof(struct net_device);

6251

alloc_size = sizeof(struct net_device);

6267

if (sizeof_priv) {

6252

if (sizeof_priv) {

6268

/* ensure 32-byte alignment of private area */

6253

/* ensure 32-byte alignment of private area */

6269

alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);

6254

alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);

6270

alloc_size += sizeof_priv;

6255

alloc_size += sizeof_priv;

6271

}

6256

}

6272

/* ensure 32-byte alignment of whole construct */

6257

/* ensure 32-byte alignment of whole construct */

6273

alloc_size += NETDEV_ALIGN - 1;

6258

alloc_size += NETDEV_ALIGN - 1;

6274

6259

6275

p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

6260

p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

6276

if (!p)

6261

if (!p)

6277

p = vzalloc(alloc_size);

6262

p = vzalloc(alloc_size);

6278

if (!p)

6263

if (!p)

6279

return NULL;

6264

return NULL;

6280

6265

6281

dev = PTR_ALIGN(p, NETDEV_ALIGN);

6266

dev = PTR_ALIGN(p, NETDEV_ALIGN);

6282

dev->padded = (char *)dev - (char *)p;

6267

dev->padded = (char *)dev - (char *)p;

6283

6268

6284

dev->pcpu_refcnt = alloc_percpu(int);

6269

dev->pcpu_refcnt = alloc_percpu(int);

6285

if (!dev->pcpu_refcnt)

6270

if (!dev->pcpu_refcnt)

6286

goto free_dev;

6271

goto free_dev;

6287

6272

6288

if (dev_addr_init(dev))

6273

if (dev_addr_init(dev))

6289

goto free_pcpu;

6274

goto free_pcpu;

6290

6275

6291

dev_mc_init(dev);

6276

dev_mc_init(dev);

6292

dev_uc_init(dev);

6277

dev_uc_init(dev);

6293

6278

6294

dev_net_set(dev, &init_net);

6279

dev_net_set(dev, &init_net);

6295

6280

6296

dev->gso_max_size = GSO_MAX_SIZE;

6281

dev->gso_max_size = GSO_MAX_SIZE;

6297

dev->gso_max_segs = GSO_MAX_SEGS;

6282

dev->gso_max_segs = GSO_MAX_SEGS;

6298

6283

6299

INIT_LIST_HEAD(&dev->napi_list);

6284

INIT_LIST_HEAD(&dev->napi_list);

6300

INIT_LIST_HEAD(&dev->unreg_list);

6285

INIT_LIST_HEAD(&dev->unreg_list);

6301

INIT_LIST_HEAD(&dev->close_list);

6286

INIT_LIST_HEAD(&dev->close_list);

6302

INIT_LIST_HEAD(&dev->link_watch_list);

6287

INIT_LIST_HEAD(&dev->link_watch_list);

6303

INIT_LIST_HEAD(&dev->adj_list.upper);

6288

INIT_LIST_HEAD(&dev->adj_list.upper);

6304

INIT_LIST_HEAD(&dev->adj_list.lower);

6289

INIT_LIST_HEAD(&dev->adj_list.lower);

6305

INIT_LIST_HEAD(&dev->all_adj_list.upper);

6290

INIT_LIST_HEAD(&dev->all_adj_list.upper);

6306

INIT_LIST_HEAD(&dev->all_adj_list.lower);

6291

INIT_LIST_HEAD(&dev->all_adj_list.lower);

6307

dev->priv_flags = IFF_XMIT_DST_RELEASE;

6292

dev->priv_flags = IFF_XMIT_DST_RELEASE;

6308

setup(dev);

6293

setup(dev);

6309

6294

6310

dev->num_tx_queues = txqs;

6295

dev->num_tx_queues = txqs;

6311

dev->real_num_tx_queues = txqs;

6296

dev->real_num_tx_queues = txqs;

6312

if (netif_alloc_netdev_queues(dev))

6297

if (netif_alloc_netdev_queues(dev))

6313

goto free_all;

6298

goto free_all;

6314

6299

6315

#ifdef CONFIG_RPS

6300

#ifdef CONFIG_RPS

6316

dev->num_rx_queues = rxqs;

6301

dev->num_rx_queues = rxqs;

6317

dev->real_num_rx_queues = rxqs;

6302

dev->real_num_rx_queues = rxqs;

6318

if (netif_alloc_rx_queues(dev))

6303

if (netif_alloc_rx_queues(dev))

6319

goto free_all;

6304

goto free_all;

6320

#endif

6305

#endif

6321

6306

6322

strcpy(dev->name, name);

6307

strcpy(dev->name, name);

6323

dev->group = INIT_NETDEV_GROUP;

6308

dev->group = INIT_NETDEV_GROUP;

6324

if (!dev->ethtool_ops)

6309

if (!dev->ethtool_ops)

6325

dev->ethtool_ops = &default_ethtool_ops;

6310

dev->ethtool_ops = &default_ethtool_ops;

6326

return dev;

6311

return dev;

6327

6312

6328

free_all:

6313

free_all:

6329

free_netdev(dev);

6314

free_netdev(dev);

6330

return NULL;

6315

return NULL;

6331

6316

6332

free_pcpu:

6317

free_pcpu:

6333

free_percpu(dev->pcpu_refcnt);

6318

free_percpu(dev->pcpu_refcnt);

6334

netif_free_tx_queues(dev);

6319

netif_free_tx_queues(dev);

6335

#ifdef CONFIG_RPS

6320

#ifdef CONFIG_RPS

6336

kfree(dev->_rx);

6321

kfree(dev->_rx);

6337

#endif

6322

#endif

6338

6323

6339

free_dev:

6324

free_dev:

6340

netdev_freemem(dev);

6325

netdev_freemem(dev);

6341

return NULL;

6326

return NULL;

6342

}

6327

}

6343

EXPORT_SYMBOL(alloc_netdev_mqs);

6328

EXPORT_SYMBOL(alloc_netdev_mqs);

6344

6329

6345

/**

6330

/**

6346

* free_netdev - free network device

6331

* free_netdev - free network device

6347

* @dev: device

6332

* @dev: device

6348

*

6333

*

6349

* This function does the last stage of destroying an allocated device

6334

* This function does the last stage of destroying an allocated device

6350

* interface. The reference to the device object is released.

6335

* interface. The reference to the device object is released.

6351

* If this is the last reference then it will be freed.

6336

* If this is the last reference then it will be freed.

6352

*/

6337

*/

6353

void free_netdev(struct net_device *dev)

6338

void free_netdev(struct net_device *dev)

6354

{

6339

{

6355

struct napi_struct *p, *n;

6340

struct napi_struct *p, *n;

6356

6341

6357

release_net(dev_net(dev));

6342

release_net(dev_net(dev));

6358

6343

6359

netif_free_tx_queues(dev);

6344

netif_free_tx_queues(dev);

6360

#ifdef CONFIG_RPS

6345

#ifdef CONFIG_RPS

6361

kfree(dev->_rx);

6346

kfree(dev->_rx);

6362

#endif

6347

#endif

6363

6348

6364

kfree(rcu_dereference_protected(dev->ingress_queue, 1));

6349

kfree(rcu_dereference_protected(dev->ingress_queue, 1));

6365

6350

6366

/* Flush device addresses */

6351

/* Flush device addresses */

6367

dev_addr_flush(dev);

6352

dev_addr_flush(dev);

6368

6353

6369

list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)

6354

list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)

6370

netif_napi_del(p);

6355

netif_napi_del(p);

6371

6356

6372

free_percpu(dev->pcpu_refcnt);

6357

free_percpu(dev->pcpu_refcnt);

6373

dev->pcpu_refcnt = NULL;

6358

dev->pcpu_refcnt = NULL;

6374

6359

6375

/* Compatibility with error handling in drivers */

6360

/* Compatibility with error handling in drivers */

6376

if (dev->reg_state == NETREG_UNINITIALIZED) {

6361

if (dev->reg_state == NETREG_UNINITIALIZED) {

6377

netdev_freemem(dev);

6362

netdev_freemem(dev);

6378

return;

6363

return;

6379

}

6364

}

6380

6365

6381

BUG_ON(dev->reg_state != NETREG_UNREGISTERED);

6366

BUG_ON(dev->reg_state != NETREG_UNREGISTERED);

6382

dev->reg_state = NETREG_RELEASED;

6367

dev->reg_state = NETREG_RELEASED;

6383

6368

6384

/* will free via device release */

6369

/* will free via device release */

6385

put_device(&dev->dev);

6370

put_device(&dev->dev);

6386

}

6371

}

6387

EXPORT_SYMBOL(free_netdev);

6372

EXPORT_SYMBOL(free_netdev);

6388

6373

6389

/**

6374

/**

6390

* synchronize_net - Synchronize with packet receive processing

6375

* synchronize_net - Synchronize with packet receive processing

6391

*

6376

*

6392

* Wait for packets currently being received to be done.

6377

* Wait for packets currently being received to be done.

6393

* Does not block later packets from starting.

6378

* Does not block later packets from starting.

6394

*/

6379

*/

6395

void synchronize_net(void)

6380

void synchronize_net(void)

6396

{

6381

{

6397

might_sleep();

6382

might_sleep();

6398

if (rtnl_is_locked())

6383

if (rtnl_is_locked())

6399

synchronize_rcu_expedited();

6384

synchronize_rcu_expedited();

6400

else

6385

else

6401

synchronize_rcu();

6386

synchronize_rcu();

6402

}

6387

}

6403

EXPORT_SYMBOL(synchronize_net);

6388

EXPORT_SYMBOL(synchronize_net);

6404

6389

6405

/**

6390

/**

6406

* unregister_netdevice_queue - remove device from the kernel

6391

* unregister_netdevice_queue - remove device from the kernel

6407

* @dev: device

6392

* @dev: device

6408

* @head: list

6393

* @head: list

6409

*

6394

*

6410

* This function shuts down a device interface and removes it

6395

* This function shuts down a device interface and removes it

6411

* from the kernel tables.

6396

* from the kernel tables.

6412

* If head not NULL, device is queued to be unregistered later.

6397

* If head not NULL, device is queued to be unregistered later.

6413

*

6398

*

6414

* Callers must hold the rtnl semaphore. You may want

6399

* Callers must hold the rtnl semaphore. You may want

6415

* unregister_netdev() instead of this.

6400

* unregister_netdev() instead of this.

6416

*/

6401

*/

6417

6402

6418

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)

6403

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)

6419

{

6404

{

6420

ASSERT_RTNL();

6405

ASSERT_RTNL();

6421

6406

6422

if (head) {

6407

if (head) {

6423

list_move_tail(&dev->unreg_list, head);

6408

list_move_tail(&dev->unreg_list, head);

6424

} else {

6409

} else {

6425

rollback_registered(dev);

6410

rollback_registered(dev);

6426

/* Finish processing unregister after unlock */

6411

/* Finish processing unregister after unlock */

6427

net_set_todo(dev);

6412

net_set_todo(dev);

6428

}

6413

}

6429

}

6414

}

6430

EXPORT_SYMBOL(unregister_netdevice_queue);

6415

EXPORT_SYMBOL(unregister_netdevice_queue);

6431

6416

6432

/**

6417

/**

6433

* unregister_netdevice_many - unregister many devices

6418

* unregister_netdevice_many - unregister many devices

6434

* @head: list of devices

6419

* @head: list of devices

6435

*/

6420

*/

6436

void unregister_netdevice_many(struct list_head *head)

6421

void unregister_netdevice_many(struct list_head *head)

6437

{

6422

{

6438

struct net_device *dev;

6423

struct net_device *dev;

6439

6424

6440

if (!list_empty(head)) {

6425

if (!list_empty(head)) {

6441

rollback_registered_many(head);

6426

rollback_registered_many(head);

6442

list_for_each_entry(dev, head, unreg_list)

6427

list_for_each_entry(dev, head, unreg_list)

6443

net_set_todo(dev);

6428

net_set_todo(dev);

6444

}

6429

}

6445

}

6430

}

6446

EXPORT_SYMBOL(unregister_netdevice_many);

6431

EXPORT_SYMBOL(unregister_netdevice_many);

6447

6432

6448

/**

6433

/**

6449

* unregister_netdev - remove device from the kernel

6434

* unregister_netdev - remove device from the kernel

6450

* @dev: device

6435

* @dev: device

6451

*

6436

*

6452

* This function shuts down a device interface and removes it

6437

* This function shuts down a device interface and removes it

6453

* from the kernel tables.

6438

* from the kernel tables.

6454

*

6439

*

6455

* This is just a wrapper for unregister_netdevice that takes

6440

* This is just a wrapper for unregister_netdevice that takes

6456

* the rtnl semaphore. In general you want to use this and not

6441

* the rtnl semaphore. In general you want to use this and not

6457

* unregister_netdevice.

6442

* unregister_netdevice.

6458

*/

6443

*/

6459

void unregister_netdev(struct net_device *dev)

6444

void unregister_netdev(struct net_device *dev)

6460

{

6445

{

6461

rtnl_lock();

6446

rtnl_lock();

6462

unregister_netdevice(dev);

6447

unregister_netdevice(dev);

6463

rtnl_unlock();

6448

rtnl_unlock();

6464

}

6449

}

6465

EXPORT_SYMBOL(unregister_netdev);

6450

EXPORT_SYMBOL(unregister_netdev);

6466

6451

6467

/**

6452

/**

6468

* dev_change_net_namespace - move device to different nethost namespace

6453

* dev_change_net_namespace - move device to different nethost namespace

6469

* @dev: device

6454

* @dev: device

6470

* @net: network namespace

6455

* @net: network namespace

6471

* @pat: If not NULL name pattern to try if the current device name

6456

* @pat: If not NULL name pattern to try if the current device name

6472

* is already taken in the destination network namespace.

6457

* is already taken in the destination network namespace.

6473

*

6458

*

6474

* This function shuts down a device interface and moves it

6459

* This function shuts down a device interface and moves it

6475

* to a new network namespace. On success 0 is returned, on

6460

* to a new network namespace. On success 0 is returned, on

6476

* a failure a netagive errno code is returned.

6461

* a failure a netagive errno code is returned.

6477

*

6462

*

6478

* Callers must hold the rtnl semaphore.

6463

* Callers must hold the rtnl semaphore.

6479

*/

6464

*/

6480

6465

6481

int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)

6466

int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)

6482

{

6467

{

6483

int err;

6468

int err;

6484

6469

6485

ASSERT_RTNL();

6470

ASSERT_RTNL();

6486

6471

6487

/* Don't allow namespace local devices to be moved. */

6472

/* Don't allow namespace local devices to be moved. */

6488

err = -EINVAL;

6473

err = -EINVAL;

6489

if (dev->features & NETIF_F_NETNS_LOCAL)

6474

if (dev->features & NETIF_F_NETNS_LOCAL)

6490

goto out;

6475

goto out;

6491

6476

6492

/* Ensure the device has been registrered */

6477

/* Ensure the device has been registrered */

6493

if (dev->reg_state != NETREG_REGISTERED)

6478

if (dev->reg_state != NETREG_REGISTERED)

6494

goto out;

6479

goto out;

6495

6480

6496

/* Get out if there is nothing todo */

6481

/* Get out if there is nothing todo */

6497

err = 0;

6482

err = 0;

6498

if (net_eq(dev_net(dev), net))

6483

if (net_eq(dev_net(dev), net))

6499

goto out;

6484

goto out;

6500

6485

6501

/* Pick the destination device name, and ensure

6486

/* Pick the destination device name, and ensure

6502

* we can use it in the destination network namespace.

6487

* we can use it in the destination network namespace.

6503

*/

6488

*/

6504

err = -EEXIST;

6489

err = -EEXIST;

6505

if (__dev_get_by_name(net, dev->name)) {

6490

if (__dev_get_by_name(net, dev->name)) {

6506

/* We get here if we can't use the current device name */

6491

/* We get here if we can't use the current device name */

6507

if (!pat)

6492

if (!pat)

6508

goto out;

6493

goto out;

6509

if (dev_get_valid_name(net, dev, pat) < 0)

6494

if (dev_get_valid_name(net, dev, pat) < 0)

6510

goto out;

6495

goto out;

6511

}

6496

}

6512

6497

6513

/*

6498

/*

6514

* And now a mini version of register_netdevice unregister_netdevice.

6499

* And now a mini version of register_netdevice unregister_netdevice.

6515

*/

6500

*/

6516

6501

6517

/* If device is running close it first. */

6502

/* If device is running close it first. */

6518

dev_close(dev);

6503

dev_close(dev);

6519

6504

6520

/* And unlink it from device chain */

6505

/* And unlink it from device chain */

6521

err = -ENODEV;

6506

err = -ENODEV;

6522

unlist_netdevice(dev);

6507

unlist_netdevice(dev);

6523

6508

6524

synchronize_net();

6509

synchronize_net();

6525

6510

6526

/* Shutdown queueing discipline. */

6511

/* Shutdown queueing discipline. */

6527

dev_shutdown(dev);

6512

dev_shutdown(dev);

6528

6513

6529

/* Notify protocols, that we are about to destroy

6514

/* Notify protocols, that we are about to destroy

6530

this device. They should clean all the things.

6515

this device. They should clean all the things.

6531

6516

6532

Note that dev->reg_state stays at NETREG_REGISTERED.

6517

Note that dev->reg_state stays at NETREG_REGISTERED.

6533

This is wanted because this way 8021q and macvlan know

6518

This is wanted because this way 8021q and macvlan know

6534

the device is just moving and can keep their slaves up.

6519

the device is just moving and can keep their slaves up.

6535

*/

6520

*/

6536

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6521

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6537

rcu_barrier();

6522

rcu_barrier();

6538

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6523

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6539

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

6524

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

6540

6525

6541

/*

6526

/*

6542

* Flush the unicast and multicast chains

6527

* Flush the unicast and multicast chains

6543

*/

6528

*/

6544

dev_uc_flush(dev);

6529

dev_uc_flush(dev);

6545

dev_mc_flush(dev);

6530

dev_mc_flush(dev);

6546

6531

6547

/* Send a netdev-removed uevent to the old namespace */

6532

/* Send a netdev-removed uevent to the old namespace */

6548

kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);

6533

kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);

6549

6534

6550

/* Actually switch the network namespace */

6535

/* Actually switch the network namespace */

6551

dev_net_set(dev, net);

6536

dev_net_set(dev, net);

6552

6537

6553

/* If there is an ifindex conflict assign a new one */

6538

/* If there is an ifindex conflict assign a new one */

6554

if (__dev_get_by_index(net, dev->ifindex)) {

6539

if (__dev_get_by_index(net, dev->ifindex)) {

6555

int iflink = (dev->iflink == dev->ifindex);

6540

int iflink = (dev->iflink == dev->ifindex);

6556

dev->ifindex = dev_new_index(net);

6541

dev->ifindex = dev_new_index(net);

6557

if (iflink)

6542

if (iflink)

6558

dev->iflink = dev->ifindex;

6543

dev->iflink = dev->ifindex;

6559

}

6544

}

6560

6545

6561

/* Send a netdev-add uevent to the new namespace */

6546

/* Send a netdev-add uevent to the new namespace */

6562

kobject_uevent(&dev->dev.kobj, KOBJ_ADD);

6547

kobject_uevent(&dev->dev.kobj, KOBJ_ADD);

6563

6548

6564

/* Fixup kobjects */

6549

/* Fixup kobjects */

6565

err = device_rename(&dev->dev, dev->name);

6550

err = device_rename(&dev->dev, dev->name);

6566

WARN_ON(err);

6551

WARN_ON(err);

6567

6552

6568

/* Add the device back in the hashes */

6553

/* Add the device back in the hashes */

6569

list_netdevice(dev);

6554

list_netdevice(dev);

6570

6555

6571

/* Notify protocols, that a new device appeared. */

6556

/* Notify protocols, that a new device appeared. */

6572

call_netdevice_notifiers(NETDEV_REGISTER, dev);

6557

call_netdevice_notifiers(NETDEV_REGISTER, dev);

6573

6558

6574

/*

6559

/*

6575

* Prevent userspace races by waiting until the network

6560

* Prevent userspace races by waiting until the network

6576

* device is fully setup before sending notifications.

6561

* device is fully setup before sending notifications.

6577

*/

6562

*/

6578

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6563

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6579

6564

6580

synchronize_net();

6565

synchronize_net();

6581

err = 0;

6566

err = 0;

6582

out:

6567

out:

6583

return err;

6568

return err;

6584

}

6569

}

6585

EXPORT_SYMBOL_GPL(dev_change_net_namespace);

6570

EXPORT_SYMBOL_GPL(dev_change_net_namespace);

6586

6571

6587

static int dev_cpu_callback(struct notifier_block *nfb,

6572

static int dev_cpu_callback(struct notifier_block *nfb,

6588

unsigned long action,

6573

unsigned long action,

6589

void *ocpu)

6574

void *ocpu)

6590

{

6575

{

6591

struct sk_buff **list_skb;

6576

struct sk_buff **list_skb;

6592

struct sk_buff *skb;

6577

struct sk_buff *skb;

6593

unsigned int cpu, oldcpu = (unsigned long)ocpu;

6578

unsigned int cpu, oldcpu = (unsigned long)ocpu;

6594

struct softnet_data *sd, *oldsd;

6579

struct softnet_data *sd, *oldsd;

6595

6580

6596

if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)

6581

if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)

6597

return NOTIFY_OK;

6582

return NOTIFY_OK;

6598

6583

6599

local_irq_disable();

6584

local_irq_disable();

6600

cpu = smp_processor_id();

6585

cpu = smp_processor_id();

6601

sd = &per_cpu(softnet_data, cpu);

6586

sd = &per_cpu(softnet_data, cpu);

6602

oldsd = &per_cpu(softnet_data, oldcpu);

6587

oldsd = &per_cpu(softnet_data, oldcpu);

6603

6588

6604

/* Find end of our completion_queue. */

6589

/* Find end of our completion_queue. */

6605

list_skb = &sd->completion_queue;

6590

list_skb = &sd->completion_queue;

6606

while (*list_skb)

6591

while (*list_skb)

6607

list_skb = &(*list_skb)->next;

6592

list_skb = &(*list_skb)->next;

6608

/* Append completion queue from offline CPU. */

6593

/* Append completion queue from offline CPU. */

6609

*list_skb = oldsd->completion_queue;

6594

*list_skb = oldsd->completion_queue;

6610

oldsd->completion_queue = NULL;

6595

oldsd->completion_queue = NULL;

6611

6596

6612

/* Append output queue from offline CPU. */

6597

/* Append output queue from offline CPU. */

6613

if (oldsd->output_queue) {

6598

if (oldsd->output_queue) {

6614

*sd->output_queue_tailp = oldsd->output_queue;

6599

*sd->output_queue_tailp = oldsd->output_queue;

6615

sd->output_queue_tailp = oldsd->output_queue_tailp;

6600

sd->output_queue_tailp = oldsd->output_queue_tailp;

6616

oldsd->output_queue = NULL;

6601

oldsd->output_queue = NULL;

6617

oldsd->output_queue_tailp = &oldsd->output_queue;

6602

oldsd->output_queue_tailp = &oldsd->output_queue;

6618

}

6603

}

6619

/* Append NAPI poll list from offline CPU. */

6604

/* Append NAPI poll list from offline CPU. */

6620

if (!list_empty(&oldsd->poll_list)) {

6605

if (!list_empty(&oldsd->poll_list)) {

6621

list_splice_init(&oldsd->poll_list, &sd->poll_list);

6606

list_splice_init(&oldsd->poll_list, &sd->poll_list);

6622

raise_softirq_irqoff(NET_RX_SOFTIRQ);

6607

raise_softirq_irqoff(NET_RX_SOFTIRQ);

6623

}

6608

}

6624

6609

6625

raise_softirq_irqoff(NET_TX_SOFTIRQ);

6610

raise_softirq_irqoff(NET_TX_SOFTIRQ);

6626

local_irq_enable();

6611

local_irq_enable();

6627

6612

6628

/* Process offline CPU's input_pkt_queue */

6613

/* Process offline CPU's input_pkt_queue */

6629

while ((skb = __skb_dequeue(&oldsd->process_queue))) {

6614

while ((skb = __skb_dequeue(&oldsd->process_queue))) {

6630

netif_rx(skb);

6615

netif_rx(skb);

6631

input_queue_head_incr(oldsd);

6616

input_queue_head_incr(oldsd);

6632

}

6617

}

6633

while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {

6618

while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {

6634

netif_rx(skb);

6619

netif_rx(skb);

6635

input_queue_head_incr(oldsd);

6620

input_queue_head_incr(oldsd);

6636

}

6621

}

6637

6622

6638

return NOTIFY_OK;

6623

return NOTIFY_OK;

6639

}

6624

}

6640

6625

6641

6626

6642

/**

6627

/**

6643

* netdev_increment_features - increment feature set by one

6628

* netdev_increment_features - increment feature set by one

6644

* @all: current feature set

6629

* @all: current feature set

6645

* @one: new feature set

6630

* @one: new feature set

6646

* @mask: mask feature set

6631

* @mask: mask feature set

6647

*

6632

*

6648

* Computes a new feature set after adding a device with feature set

6633

* Computes a new feature set after adding a device with feature set

6649

* @one to the master device with current feature set @all. Will not

6634

* @one to the master device with current feature set @all. Will not

6650

* enable anything that is off in @mask. Returns the new feature set.

6635

* enable anything that is off in @mask. Returns the new feature set.

6651

*/

6636

*/

6652

netdev_features_t netdev_increment_features(netdev_features_t all,

6637

netdev_features_t netdev_increment_features(netdev_features_t all,

6653

netdev_features_t one, netdev_features_t mask)

6638

netdev_features_t one, netdev_features_t mask)

6654

{

6639

{

6655

if (mask & NETIF_F_GEN_CSUM)

6640

if (mask & NETIF_F_GEN_CSUM)

6656

mask |= NETIF_F_ALL_CSUM;

6641

mask |= NETIF_F_ALL_CSUM;

6657

mask |= NETIF_F_VLAN_CHALLENGED;

6642

mask |= NETIF_F_VLAN_CHALLENGED;

6658

6643

6659

all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;

6644

all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;

6660

all &= one | ~NETIF_F_ALL_FOR_ALL;

6645

all &= one | ~NETIF_F_ALL_FOR_ALL;

6661

6646

6662

/* If one device supports hw checksumming, set for all. */

6647

/* If one device supports hw checksumming, set for all. */

6663

if (all & NETIF_F_GEN_CSUM)

6648

if (all & NETIF_F_GEN_CSUM)

6664

all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);

6649

all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);

6665

6650

6666

return all;

6651

return all;

6667

}

6652

}

6668

EXPORT_SYMBOL(netdev_increment_features);

6653

EXPORT_SYMBOL(netdev_increment_features);

6669

6654

6670

static struct hlist_head * __net_init netdev_create_hash(void)

6655

static struct hlist_head * __net_init netdev_create_hash(void)

6671

{

6656

{

6672

int i;

6657

int i;

6673

struct hlist_head *hash;

6658

struct hlist_head *hash;

6674

6659

6675

hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);

6660

hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);

6676

if (hash != NULL)

6661

if (hash != NULL)

6677

for (i = 0; i < NETDEV_HASHENTRIES; i++)

6662

for (i = 0; i < NETDEV_HASHENTRIES; i++)

6678

INIT_HLIST_HEAD(&hash[i]);

6663

INIT_HLIST_HEAD(&hash[i]);

6679

6664

6680

return hash;

6665

return hash;

6681

}

6666

}

6682

6667

6683

/* Initialize per network namespace state */

6668

/* Initialize per network namespace state */

6684

static int __net_init netdev_init(struct net *net)

6669

static int __net_init netdev_init(struct net *net)

6685

{

6670

{

6686

if (net != &init_net)

6671

if (net != &init_net)

6687

INIT_LIST_HEAD(&net->dev_base_head);

6672

INIT_LIST_HEAD(&net->dev_base_head);

6688

6673

6689

net->dev_name_head = netdev_create_hash();

6674

net->dev_name_head = netdev_create_hash();

6690

if (net->dev_name_head == NULL)

6675

if (net->dev_name_head == NULL)

6691

goto err_name;

6676

goto err_name;

6692

6677

6693

net->dev_index_head = netdev_create_hash();

6678

net->dev_index_head = netdev_create_hash();

6694

if (net->dev_index_head == NULL)

6679

if (net->dev_index_head == NULL)

6695

goto err_idx;

6680

goto err_idx;

6696

6681

6697

return 0;

6682

return 0;

6698

6683

6699

err_idx:

6684

err_idx:

6700

kfree(net->dev_name_head);

6685

kfree(net->dev_name_head);

6701

err_name:

6686

err_name:

6702

return -ENOMEM;

6687

return -ENOMEM;

6703

}

6688

}

6704

6689

6705

/**

6690

/**

6706

* netdev_drivername - network driver for the device

6691

* netdev_drivername - network driver for the device

6707

* @dev: network device

6692

* @dev: network device

6708

*

6693

*

6709

* Determine network driver for device.

6694

* Determine network driver for device.

6710

*/

6695

*/

6711

const char *netdev_drivername(const struct net_device *dev)

6696

const char *netdev_drivername(const struct net_device *dev)

6712

{

6697

{

6713

const struct device_driver *driver;

6698

const struct device_driver *driver;

6714

const struct device *parent;

6699

const struct device *parent;

6715

const char *empty = "";

6700

const char *empty = "";

6716

6701

6717

parent = dev->dev.parent;

6702

parent = dev->dev.parent;

6718

if (!parent)

6703

if (!parent)

6719

return empty;

6704

return empty;

6720

6705

6721

driver = parent->driver;

6706

driver = parent->driver;

6722

if (driver && driver->name)

6707

if (driver && driver->name)

6723

return driver->name;

6708

return driver->name;

6724

return empty;

6709

return empty;

6725

}

6710

}

6726

6711

6727

static int __netdev_printk(const char *level, const struct net_device *dev,

6712

static int __netdev_printk(const char *level, const struct net_device *dev,

6728

struct va_format *vaf)

6713

struct va_format *vaf)

6729

{

6714

{

6730

int r;

6715

int r;

6731

6716

6732

if (dev && dev->dev.parent) {

6717

if (dev && dev->dev.parent) {

6733

r = dev_printk_emit(level[1] - '0',

6718

r = dev_printk_emit(level[1] - '0',

6734

dev->dev.parent,

6719

dev->dev.parent,

6735

"%s %s %s: %pV",

6720

"%s %s %s: %pV",

6736

dev_driver_string(dev->dev.parent),

6721

dev_driver_string(dev->dev.parent),

6737

dev_name(dev->dev.parent),

6722

dev_name(dev->dev.parent),

6738

netdev_name(dev), vaf);

6723

netdev_name(dev), vaf);

6739

} else if (dev) {

6724

} else if (dev) {

6740

r = printk("%s%s: %pV", level, netdev_name(dev), vaf);

6725

r = printk("%s%s: %pV", level, netdev_name(dev), vaf);

6741

} else {

6726

} else {

6742

r = printk("%s(NULL net_device): %pV", level, vaf);

6727

r = printk("%s(NULL net_device): %pV", level, vaf);

6743

}

6728

}

6744

6729

6745

return r;

6730

return r;

6746

}

6731

}

6747

6732

6748

int netdev_printk(const char *level, const struct net_device *dev,

6733

int netdev_printk(const char *level, const struct net_device *dev,

6749

const char *format, ...)

6734

const char *format, ...)

6750

{

6735

{

6751

struct va_format vaf;

6736

struct va_format vaf;

6752

va_list args;

6737

va_list args;

6753

int r;

6738

int r;

6754

6739

6755

va_start(args, format);

6740

va_start(args, format);

6756

6741

6757

vaf.fmt = format;

6742

vaf.fmt = format;

6758

vaf.va = &args;

6743

vaf.va = &args;

6759

6744

6760

r = __netdev_printk(level, dev, &vaf);

6745

r = __netdev_printk(level, dev, &vaf);

6761

6746

6762

va_end(args);

6747

va_end(args);

6763

6748

6764

return r;

6749

return r;

6765

}

6750

}

6766

EXPORT_SYMBOL(netdev_printk);

6751

EXPORT_SYMBOL(netdev_printk);

6767

6752

6768

#define define_netdev_printk_level(func, level) \

6753

#define define_netdev_printk_level(func, level) \

6769

int func(const struct net_device *dev, const char *fmt, ...) \

6754

int func(const struct net_device *dev, const char *fmt, ...) \

6770

{ \

6755

{ \

6771

int r; \

6756

int r; \

6772

struct va_format vaf; \

6757

struct va_format vaf; \

6773

va_list args; \

6758

va_list args; \

6774

\

6759

\

6775

va_start(args, fmt); \

6760

va_start(args, fmt); \

6776

\

6761

\

6777

vaf.fmt = fmt; \

6762

vaf.fmt = fmt; \

6778

vaf.va = &args; \

6763

vaf.va = &args; \

6779

\

6764

\

6780

r = __netdev_printk(level, dev, &vaf); \

6765

r = __netdev_printk(level, dev, &vaf); \

6781

\

6766

\

6782

va_end(args); \

6767

va_end(args); \

6783

\

6768

\

6784

return r; \

6769

return r; \

6785

} \

6770

} \

6786

EXPORT_SYMBOL(func);

6771

EXPORT_SYMBOL(func);

6787

6772

6788

define_netdev_printk_level(netdev_emerg, KERN_EMERG);

6773

define_netdev_printk_level(netdev_emerg, KERN_EMERG);

6789

define_netdev_printk_level(netdev_alert, KERN_ALERT);

6774

define_netdev_printk_level(netdev_alert, KERN_ALERT);

6790

define_netdev_printk_level(netdev_crit, KERN_CRIT);

6775

define_netdev_printk_level(netdev_crit, KERN_CRIT);

6791

define_netdev_printk_level(netdev_err, KERN_ERR);

6776

define_netdev_printk_level(netdev_err, KERN_ERR);

6792

define_netdev_printk_level(netdev_warn, KERN_WARNING);

6777

define_netdev_printk_level(netdev_warn, KERN_WARNING);

6793

define_netdev_printk_level(netdev_notice, KERN_NOTICE);

6778

define_netdev_printk_level(netdev_notice, KERN_NOTICE);

6794

define_netdev_printk_level(netdev_info, KERN_INFO);

6779

define_netdev_printk_level(netdev_info, KERN_INFO);

6795

6780

6796

static void __net_exit netdev_exit(struct net *net)

6781

static void __net_exit netdev_exit(struct net *net)

6797

{

6782

{

6798

kfree(net->dev_name_head);

6783

kfree(net->dev_name_head);

6799

kfree(net->dev_index_head);

6784

kfree(net->dev_index_head);

6800

}

6785

}

6801

6786

6802

static struct pernet_operations __net_initdata netdev_net_ops = {

6787

static struct pernet_operations __net_initdata netdev_net_ops = {

6803

.init = netdev_init,

6788

.init = netdev_init,

6804

.exit = netdev_exit,

6789

.exit = netdev_exit,

6805

};

6790

};

6806

6791

6807

static void __net_exit default_device_exit(struct net *net)

6792

static void __net_exit default_device_exit(struct net *net)

6808

{

6793

{

6809

struct net_device *dev, *aux;

6794

struct net_device *dev, *aux;

6810

/*

6795

/*

6811

* Push all migratable network devices back to the

6796

* Push all migratable network devices back to the

6812

* initial network namespace

6797

* initial network namespace

6813

*/

6798

*/

6814

rtnl_lock();

6799

rtnl_lock();

6815

for_each_netdev_safe(net, dev, aux) {

6800

for_each_netdev_safe(net, dev, aux) {

6816

int err;

6801

int err;

6817

char fb_name[IFNAMSIZ];

6802

char fb_name[IFNAMSIZ];

6818

6803

6819

/* Ignore unmoveable devices (i.e. loopback) */

6804

/* Ignore unmoveable devices (i.e. loopback) */

6820

if (dev->features & NETIF_F_NETNS_LOCAL)

6805

if (dev->features & NETIF_F_NETNS_LOCAL)

6821

continue;

6806

continue;

6822

6807

6823

/* Leave virtual devices for the generic cleanup */

6808

/* Leave virtual devices for the generic cleanup */

6824

if (dev->rtnl_link_ops)

6809

if (dev->rtnl_link_ops)

6825

continue;

6810

continue;

6826

6811

6827

/* Push remaining network devices to init_net */

6812

/* Push remaining network devices to init_net */

6828

snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);

6813

snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);

6829

err = dev_change_net_namespace(dev, &init_net, fb_name);

6814

err = dev_change_net_namespace(dev, &init_net, fb_name);

6830

if (err) {

6815

if (err) {

6831

pr_emerg("%s: failed to move %s to init_net: %d\n",

6816

pr_emerg("%s: failed to move %s to init_net: %d\n",

6832

__func__, dev->name, err);

6817

__func__, dev->name, err);

6833

BUG();

6818

BUG();

6834

}

6819

}

6835

}

6820

}

6836

rtnl_unlock();

6821

rtnl_unlock();

6837

}

6822

}

6838

6823

6839

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)

6824

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)

6840

{

6825

{

6841

/* Return with the rtnl_lock held when there are no network

6826

/* Return with the rtnl_lock held when there are no network

6842

* devices unregistering in any network namespace in net_list.

6827

* devices unregistering in any network namespace in net_list.

6843

*/

6828

*/

6844

struct net *net;

6829

struct net *net;

6845

bool unregistering;

6830

bool unregistering;

6846

DEFINE_WAIT(wait);

6831

DEFINE_WAIT(wait);

6847

6832

6848

for (;;) {

6833

for (;;) {

6849

prepare_to_wait(&netdev_unregistering_wq, &wait,

6834

prepare_to_wait(&netdev_unregistering_wq, &wait,

6850

TASK_UNINTERRUPTIBLE);

6835

TASK_UNINTERRUPTIBLE);

6851

unregistering = false;

6836

unregistering = false;

6852

rtnl_lock();

6837

rtnl_lock();

6853

list_for_each_entry(net, net_list, exit_list) {

6838

list_for_each_entry(net, net_list, exit_list) {

6854

if (net->dev_unreg_count > 0) {

6839

if (net->dev_unreg_count > 0) {

6855

unregistering = true;

6840

unregistering = true;

6856

break;

6841

break;

6857

}

6842

}

6858

}

6843

}

6859

if (!unregistering)

6844

if (!unregistering)

6860

break;

6845

break;

6861

__rtnl_unlock();

6846

__rtnl_unlock();

6862

schedule();

6847

schedule();

6863

}

6848

}

6864

finish_wait(&netdev_unregistering_wq, &wait);

6849

finish_wait(&netdev_unregistering_wq, &wait);

6865

}

6850

}

6866

6851

6867

static void __net_exit default_device_exit_batch(struct list_head *net_list)

6852

static void __net_exit default_device_exit_batch(struct list_head *net_list)

6868

{

6853

{

6869

/* At exit all network devices most be removed from a network

6854

/* At exit all network devices most be removed from a network

6870

* namespace. Do this in the reverse order of registration.

6855

* namespace. Do this in the reverse order of registration.

6871

* Do this across as many network namespaces as possible to

6856

* Do this across as many network namespaces as possible to

6872

* improve batching efficiency.

6857

* improve batching efficiency.

6873

*/

6858

*/

6874

struct net_device *dev;

6859

struct net_device *dev;

6875

struct net *net;

6860

struct net *net;

6876

LIST_HEAD(dev_kill_list);

6861

LIST_HEAD(dev_kill_list);

6877

6862

6878

/* To prevent network device cleanup code from dereferencing

6863

/* To prevent network device cleanup code from dereferencing

6879

* loopback devices or network devices that have been freed

6864

* loopback devices or network devices that have been freed

6880

* wait here for all pending unregistrations to complete,

6865

* wait here for all pending unregistrations to complete,

6881

* before unregistring the loopback device and allowing the

6866

* before unregistring the loopback device and allowing the

6882

* network namespace be freed.

6867

* network namespace be freed.

6883

*

6868

*

6884

* The netdev todo list containing all network devices

6869

* The netdev todo list containing all network devices

6885

* unregistrations that happen in default_device_exit_batch

6870

* unregistrations that happen in default_device_exit_batch

6886

* will run in the rtnl_unlock() at the end of

6871

* will run in the rtnl_unlock() at the end of

6887

* default_device_exit_batch.

6872

* default_device_exit_batch.

6888

*/

6873

*/

6889

rtnl_lock_unregistering(net_list);

6874

rtnl_lock_unregistering(net_list);

6890

list_for_each_entry(net, net_list, exit_list) {

6875

list_for_each_entry(net, net_list, exit_list) {

6891

for_each_netdev_reverse(net, dev) {

6876

for_each_netdev_reverse(net, dev) {

6892

if (dev->rtnl_link_ops)

6877

if (dev->rtnl_link_ops)

6893

dev->rtnl_link_ops->dellink(dev, &dev_kill_list);

6878

dev->rtnl_link_ops->dellink(dev, &dev_kill_list);

6894

else

6879

else

6895

unregister_netdevice_queue(dev, &dev_kill_list);

6880

unregister_netdevice_queue(dev, &dev_kill_list);

6896

}

6881

}

6897

}

6882

}

6898

unregister_netdevice_many(&dev_kill_list);

6883

unregister_netdevice_many(&dev_kill_list);

6899

list_del(&dev_kill_list);

6884

list_del(&dev_kill_list);

6900

rtnl_unlock();

6885

rtnl_unlock();

6901

}

6886

}

6902

6887

6903

static struct pernet_operations __net_initdata default_device_ops = {

6888

static struct pernet_operations __net_initdata default_device_ops = {

6904

.exit = default_device_exit,

6889

.exit = default_device_exit,

6905

.exit_batch = default_device_exit_batch,

6890

.exit_batch = default_device_exit_batch,

6906

};

6891

};

6907

6892

6908

/*

6893

/*

6909

* Initialize the DEV module. At boot time this walks the device list and

6894

* Initialize the DEV module. At boot time this walks the device list and

6910

* unhooks any devices that fail to initialise (normally hardware not

6895

* unhooks any devices that fail to initialise (normally hardware not

6911

* present) and leaves us with a valid list of present and active devices.

6896

* present) and leaves us with a valid list of present and active devices.

6912

*

6897

*

6913

*/

6898

*/

6914

6899

6915

/*

6900

/*

6916

* This is called single threaded during boot, so no need

6901

* This is called single threaded during boot, so no need

6917

* to take the rtnl semaphore.

6902

* to take the rtnl semaphore.

6918

*/

6903

*/

6919

static int __init net_dev_init(void)

6904

static int __init net_dev_init(void)

6920

{

6905

{

6921

int i, rc = -ENOMEM;

6906

int i, rc = -ENOMEM;

6922

6907

6923

BUG_ON(!dev_boot_phase);

6908

BUG_ON(!dev_boot_phase);

6924

6909

6925

if (dev_proc_init())

6910

if (dev_proc_init())

6926

goto out;

6911

goto out;

6927

6912

6928

if (netdev_kobject_init())

6913

if (netdev_kobject_init())

6929

goto out;

6914

goto out;

6930

6915

6931

INIT_LIST_HEAD(&ptype_all);

6916

INIT_LIST_HEAD(&ptype_all);

6932

for (i = 0; i < PTYPE_HASH_SIZE; i++)

6917

for (i = 0; i < PTYPE_HASH_SIZE; i++)

6933

INIT_LIST_HEAD(&ptype_base[i]);

6918

INIT_LIST_HEAD(&ptype_base[i]);

6934

6919

6935

INIT_LIST_HEAD(&offload_base);

6920

INIT_LIST_HEAD(&offload_base);

6936

6921

6937

if (register_pernet_subsys(&netdev_net_ops))

6922

if (register_pernet_subsys(&netdev_net_ops))

6938

goto out;

6923

goto out;

6939

6924

6940

/*

6925

/*

6941

* Initialise the packet receive queues.

6926

* Initialise the packet receive queues.

6942

*/

6927

*/

6943

6928

6944

for_each_possible_cpu(i) {

6929

for_each_possible_cpu(i) {

6945

struct softnet_data *sd = &per_cpu(softnet_data, i);

6930

struct softnet_data *sd = &per_cpu(softnet_data, i);

6946

6931

6947

memset(sd, 0, sizeof(*sd));

6932

memset(sd, 0, sizeof(*sd));

6948

skb_queue_head_init(&sd->input_pkt_queue);

6933

skb_queue_head_init(&sd->input_pkt_queue);

6949

skb_queue_head_init(&sd->process_queue);

6934

skb_queue_head_init(&sd->process_queue);

6950

sd->completion_queue = NULL;

6935

sd->completion_queue = NULL;

6951

INIT_LIST_HEAD(&sd->poll_list);

6936

INIT_LIST_HEAD(&sd->poll_list);

6952

sd->output_queue = NULL;

6937

sd->output_queue = NULL;

6953

sd->output_queue_tailp = &sd->output_queue;

6938

sd->output_queue_tailp = &sd->output_queue;

6954

#ifdef CONFIG_RPS

6939

#ifdef CONFIG_RPS

6955

sd->csd.func = rps_trigger_softirq;

6940

sd->csd.func = rps_trigger_softirq;

6956

sd->csd.info = sd;

6941

sd->csd.info = sd;

6957

sd->csd.flags = 0;

6942

sd->csd.flags = 0;

6958

sd->cpu = i;

6943

sd->cpu = i;

6959

#endif

6944

#endif

6960

6945

6961

sd->backlog.poll = process_backlog;

6946

sd->backlog.poll = process_backlog;

6962

sd->backlog.weight = weight_p;

6947

sd->backlog.weight = weight_p;

6963

sd->backlog.gro_list = NULL;

6948

sd->backlog.gro_list = NULL;

6964

sd->backlog.gro_count = 0;

6949

sd->backlog.gro_count = 0;

6965

6950

6966

#ifdef CONFIG_NET_FLOW_LIMIT

6951

#ifdef CONFIG_NET_FLOW_LIMIT

6967

sd->flow_limit = NULL;

6952

sd->flow_limit = NULL;

6968

#endif

6953

#endif

6969

}

6954

}

6970

6955

6971

dev_boot_phase = 0;

6956

dev_boot_phase = 0;

6972

6957

6973

/* The loopback device is special if any other network devices

6958

/* The loopback device is special if any other network devices

6974

* is present in a network namespace the loopback device must

6959

* is present in a network namespace the loopback device must

6975

* be present. Since we now dynamically allocate and free the

6960

* be present. Since we now dynamically allocate and free the

6976

* loopback device ensure this invariant is maintained by

6961

* loopback device ensure this invariant is maintained by

6977

* keeping the loopback device as the first device on the

6962

* keeping the loopback device as the first device on the

6978

* list of network devices. Ensuring the loopback devices

6963

* list of network devices. Ensuring the loopback devices

6979

* is the first device that appears and the last network device

6964

* is the first device that appears and the last network device

6980

* that disappears.

6965

* that disappears.

6981

*/

6966

*/

6982

if (register_pernet_device(&loopback_net_ops))

6967

if (register_pernet_device(&loopback_net_ops))

6983

goto out;

6968

goto out;

6984

6969

6985

if (register_pernet_device(&default_device_ops))

6970

if (register_pernet_device(&default_device_ops))

6986

goto out;

6971

goto out;

6987

6972

6988

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

6973

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

6989

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

6974

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

6990

6975

6991

hotcpu_notifier(dev_cpu_callback, 0);

6976

hotcpu_notifier(dev_cpu_callback, 0);

6992

dst_init();

6977

dst_init();

6993

rc = 0;

6978

rc = 0;

6994

out:

6979

out:

6995

return rc;

6980

return rc;

6996

}

6981

}

6997

6982

6998

subsys_initcall(net_dev_init);

6983

subsys_initcall(net_dev_init);

6999

6984

GITLAB

net: dev: move inline skb_needs_linearize helper to header

 /*
  *	Definitions for the 'struct sk_buff' memory handlers.
  *
  *	Authors:
  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
  *		Florian La Roche, <rzsfl@rz.uni-sb.de>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
 #ifndef _LINUX_SKBUFF_H
 #define _LINUX_SKBUFF_H
 #include <linux/kernel.h>
 #include <linux/kmemcheck.h>
 #include <linux/compiler.h>
 #include <linux/time.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
 #include <linux/atomic.h>
 #include <asm/types.h>
 #include <linux/spinlock.h>
 #include <linux/net.h>
 #include <linux/textsearch.h>
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <net/flow_keys.h>
 /* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
 #define CHECKSUM_UNNECESSARY 1
 #define CHECKSUM_COMPLETE 2
 #define CHECKSUM_PARTIAL 3
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
 #define SKB_WITH_OVERHEAD(X)	\
 	((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define SKB_MAX_ORDER(X, ORDER) \
 	SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
 #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
 /* return minimum truesize of one skb containing X bytes of data */
 #define SKB_TRUESIZE(X) ((X) +						\
 			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 /* A. Checksumming of received packets by device.
  *
  *	NONE: device failed to checksum this packet.
  *		skb->csum is undefined.
  *
  *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
  *		skb->csum is undefined.
  *	      It is bad option, but, unfortunately, many of vendors do this.
  *	      Apparently with secret goal to sell you new device, when you
  *	      will add new protocol to your host. F.e. IPv6. 8)
  *
  *	COMPLETE: the most generic way. Device supplied checksum of _all_
  *	    the packet as seen by netif_rx in skb->csum.
  *	    NOTE: Even if device supports only some protocols, but
  *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
  *	PARTIAL: identical to the case for output below.  This may occur
  *	    on a packet received directly from another Linux OS, e.g.,
  *	    a virtualised Linux kernel on the same host.  The packet can
  *	    be treated in the same way as UNNECESSARY except that on
  *	    output (i.e., forwarding) the checksum must be filled in
  *	    by the OS or the hardware.
  *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
  *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
  *	from skb->csum_start to the end and to record the checksum
  *	at skb->csum_start + skb->csum_offset.
  *
  *	Device must show its capabilities in dev->features, set
  *	at device setup time.
  *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  *			  everything.
  *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  *			  TCP/UDP over IPv4. Sigh. Vendors like this
  *			  way by an unknown reason. Though, see comment above
  *			  about CHECKSUM_UNNECESSARY. 8)
  *	NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
  *
  *	UNNECESSARY: device will do per protocol specific csum. Protocol drivers
  *	that do not want net to perform the checksum calculation should use
  *	this flag in their outgoing skbs.
  *	NETIF_F_FCOE_CRC  this indicates the device can do FCoE FC CRC
  *			  offload. Correspondingly, the FCoE protocol driver
  *			  stack should use CHECKSUM_UNNECESSARY.
  *
  *	Any questions? No questions, good. 		--ANK
  */
 struct net_device;
 struct scatterlist;
 struct pipe_inode_info;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
 	atomic_t use;
 };
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 struct nf_bridge_info {
 	atomic_t		use;
 	unsigned int		mask;
 	struct net_device	*physindev;
 	struct net_device	*physoutdev;
 	unsigned long		data[32 / sizeof(unsigned long)];
 };
 #endif
 struct sk_buff_head {
 	/* These two members must be first. */
 	struct sk_buff	*next;
 	struct sk_buff	*prev;
 	__u32		qlen;
 	spinlock_t	lock;
 };
 struct sk_buff;
 /* To allow 64K frame to be packed as single skb without frag_list we
  * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
  * buffers which do not start on a page boundary.
  *
  * Since GRO uses frags we allocate at least 16 regardless of page
  * size.
  */
 #if (65536/PAGE_SIZE + 1) < 16
 #define MAX_SKB_FRAGS 16UL
 #else
 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
 typedef struct skb_frag_struct skb_frag_t;
 struct skb_frag_struct {
 	struct {
 		struct page *p;
 	} page;
 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 	__u32 page_offset;
 	__u32 size;
 #else
 	__u16 page_offset;
 	__u16 size;
 #endif
 };
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 {
 	return frag->size;
 }
 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 {
 	frag->size = size;
 }
 static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 {
 	frag->size += delta;
 }
 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 {
 	frag->size -= delta;
 }
 #define HAVE_HW_TIME_STAMP
 /**
  * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
  * @syststamp:	hwtstamp transformed to system time base
  *
  * Software time stamps generated by ktime_get_real() are stored in
  * skb->tstamp. The relation between the different kinds of time
  * stamps is as follows:
  *
  * syststamp and tstamp can be compared against each other in
  * arbitrary combinations.  The accuracy of a
  * syststamp/tstamp/"syststamp from other device" comparison is
  * limited by the accuracy of the transformation into system time
  * base. This depends on the device driver and its underlying
  * hardware.
  *
  * hwtstamps can only be compared against other hwtstamps from
  * the same device.
  *
  * This structure is attached to packets as part of the
  * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
  */
 struct skb_shared_hwtstamps {
 	ktime_t	hwtstamp;
 	ktime_t	syststamp;
 };
 /* Definitions for tx_flags in struct skb_shared_info */
 enum {
 	/* generate hardware time stamp */
 	SKBTX_HW_TSTAMP = 1 << 0,
 	/* generate software time stamp */
 	SKBTX_SW_TSTAMP = 1 << 1,
 	/* device driver is going to provide hardware time stamp */
 	SKBTX_IN_PROGRESS = 1 << 2,
 	/* device driver supports TX zero-copy buffers */
 	SKBTX_DEV_ZEROCOPY = 1 << 3,
 	/* generate wifi status information (where possible) */
 	SKBTX_WIFI_STATUS = 1 << 4,
 	/* This indicates at least one fragment might be overwritten
 	 * (as in vmsplice(), sendfile() ...)
 	 * If we need to compute a TX checksum, we'll need to copy
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBTX_SHARED_FRAG = 1 << 5,
 };
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
  * The zerocopy_success argument is true if zero copy transmit occurred,
  * false on data copy or out of memory error caused by data copy attempt.
  * The ctx field is used to track device context.
  * The desc field is used to track userspace buffer index.
  */
 struct ubuf_info {
 	void (*callback)(struct ubuf_info *, bool zerocopy_success);
 	void *ctx;
 	unsigned long desc;
 };
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
 struct skb_shared_info {
 	unsigned char	nr_frags;
 	__u8		tx_flags;
 	unsigned short	gso_size;
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
 	__be32          ip6_frag_id;
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
 	 */
 	atomic_t	dataref;
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
 	void *		destructor_arg;
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
 /* We divide dataref into two halves.  The higher 16 bits hold references
  * to the payload part of skb->data.  The lower 16 bits hold references to
  * the entire skb->data.  A clone of a headerless skb holds the length of
  * the header in skb->hdr_len.
  *
  * All users must obey the rule that the skb->data reference count must be
  * greater than or equal to the payload reference count.
  *
  * Holding a reference to the payload part means that the user does not
  * care about modifications to the header part of skb->data.
  */
 #define SKB_DATAREF_SHIFT 16
 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
 enum {
 	SKB_FCLONE_UNAVAILABLE,
 	SKB_FCLONE_ORIG,
 	SKB_FCLONE_CLONE,
 };
 enum {
 	SKB_GSO_TCPV4 = 1 << 0,
 	SKB_GSO_UDP = 1 << 1,
 	/* This indicates the skb is from an untrusted source. */
 	SKB_GSO_DODGY = 1 << 2,
 	/* This indicates the tcp segment has CWR set. */
 	SKB_GSO_TCP_ECN = 1 << 3,
 	SKB_GSO_TCPV6 = 1 << 4,
 	SKB_GSO_FCOE = 1 << 5,
 	SKB_GSO_GRE = 1 << 6,
 	SKB_GSO_IPIP = 1 << 7,
 	SKB_GSO_SIT = 1 << 8,
 	SKB_GSO_UDP_TUNNEL = 1 << 9,
 	SKB_GSO_MPLS = 1 << 10,
 };
 #if BITS_PER_LONG > 32
 #define NET_SKBUFF_DATA_USES_OFFSET 1
 #endif
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 typedef unsigned int sk_buff_data_t;
 #else
 typedef unsigned char *sk_buff_data_t;
 #endif
 /**
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
  *	@tstamp: Time we arrived
  *	@sk: Socket we are owned by
  *	@dev: Device we arrived on/are leaving by
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@_skb_refdst: destination entry (with norefcount bit)
  *	@sp: the security path, used for xfrm
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
  *	@hdr_len: writable header length of cloned skb
  *	@csum: Checksum (must include start/offset pair)
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
  *	@priority: Packet queueing priority
  *	@local_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@ip_summed: Driver fed us an IP checksum
  *	@nohdr: Payload reference only, must not modify header
  *	@nfctinfo: Relationship of this skb to the connection
  *	@pkt_type: Packet class
  *	@fclone: skbuff clone status
  *	@ipvs_property: skbuff is owned by ipvs
  *	@peeked: this packet has been seen already, so stats have been
  *		done for it, don't do them again
  *	@nf_trace: netfilter packet trace flag
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
  *	@nfct: Associated connection, if any
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@rxhash: the packet hash computed on receive
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
  *		ports.
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
  *	@dropcount: total number of sk_receive_queue overflows
  *	@vlan_proto: vlan encapsulation protocol
  *	@vlan_tci: vlan tag control information
  *	@inner_protocol: Protocol (encapsulation)
  *	@inner_transport_header: Inner transport layer header (encapsulation)
  *	@inner_network_header: Network layer header (encapsulation)
  *	@inner_mac_header: Link layer header (encapsulation)
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
  *	@tail: Tail pointer
  *	@end: End pointer
  *	@head: Head of buffer
  *	@data: Data head pointer
  *	@truesize: Buffer size
  *	@users: User count - see {datagram,tcp}.c
  */
 struct sk_buff {
 	/* These two members must be first. */
 	struct sk_buff		*next;
 	struct sk_buff		*prev;
 	ktime_t			tstamp;
 	struct sock		*sk;
 	struct net_device	*dev;
 	/*
 	 * This is the control buffer. It is free to use for every
 	 * layer. Please put your private variables there. If you
 	 * want to keep them across layers you have to do a skb_clone()
 	 * first. This is owned by whoever has the skb queued ATM.
 	 */
 	char			cb[48] __aligned(8);
 	unsigned long		_skb_refdst;
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
 				hdr_len;
 	union {
 		__wsum		csum;
 		struct {
 			__u16	csum_start;
 			__u16	csum_offset;
 		};
 	};
 	__u32			priority;
 	kmemcheck_bitfield_begin(flags1);
 	__u8			local_df:1,
 				cloned:1,
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
 				peeked:1,
 				nf_trace:1;
 	kmemcheck_bitfield_end(flags1);
 	__be16			protocol;
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
 	int			skb_iif;
 	__u32			rxhash;
 	__be16			vlan_proto;
 	__u16			vlan_tci;
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
 	__u16			tc_verd;	/* traffic control verdict */
 #endif
 #endif
 	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			pfmemalloc:1;
 	__u8			ooo_okay:1;
 	__u8			l4_rxhash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
 	/* Encapsulation protocol and NIC drivers should use
 	 * this flag to indicate to each other if the skb contains
 	 * encapsulated packet or not and maybe use the inner packet
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
 	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
 		dma_cookie_t	dma_cookie;
 	};
 #endif
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
 	union {
 		__u32		mark;
 		__u32		dropcount;
 		__u32		reserved_tailroom;
 	};
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
 	__u16			transport_header;
 	__u16			network_header;
 	__u16			mac_header;
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
 	sk_buff_data_t		end;
 	unsigned char		*head,
 				*data;
 	unsigned int		truesize;
 	atomic_t		users;
 };
 #ifdef __KERNEL__
 /*
  *	Handling routines are only of interest to the kernel
  */
 #include <linux/slab.h>
 #define SKB_ALLOC_FCLONE	0x01
 #define SKB_ALLOC_RX		0x02
 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
 	return unlikely(skb->pfmemalloc);
 }
 /*
  * skb might have a dst pointer attached, refcounted or not.
  * _skb_refdst low order bit is set if refcount was _not_ taken
  */
 #define SKB_DST_NOREF	1UL
 #define SKB_DST_PTRMASK	~(SKB_DST_NOREF)
 /**
  * skb_dst - returns skb dst_entry
  * @skb: buffer
  *
  * Returns skb dst_entry, regardless of reference taken or not.
  */
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
 	/* If refdst was not refcounted, check we still are in a
 	 * rcu_read_lock section
 	 */
 	WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
 		!rcu_read_lock_held() &&
 		!rcu_read_lock_bh_held());
 	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
 }
 /**
  * skb_dst_set - sets skb dst
  * @skb: buffer
  * @dst: dst entry
  *
  * Sets skb dst, assuming a reference was taken on dst and should
  * be released by skb_dst_drop()
  */
 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 {
 	skb->_skb_refdst = (unsigned long)dst;
 }
 void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
 			 bool force);
 /**
  * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
  * @skb: buffer
  * @dst: dst entry
  *
  * Sets skb dst, assuming a reference was not taken on dst.
  * If dst entry is cached, we do not take reference and dst_release
  * will be avoided by refdst_drop. If dst entry is not cached, we take
  * reference, so that last dst_release can destroy the dst immediately.
  */
 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 {
 	__skb_dst_set_noref(skb, dst, false);
 }
 /**
  * skb_dst_set_noref_force - sets skb dst, without taking reference
  * @skb: buffer
  * @dst: dst entry
  *
  * Sets skb dst, assuming a reference was not taken on dst.
  * No reference is taken and no dst_release will be called. While for
  * cached dsts deferred reclaim is a basic feature, for entries that are
  * not cached it is caller's job to guarantee that last dst_release for
  * provided dst happens when nobody uses it, eg. after a RCU grace period.
  */
 static inline void skb_dst_set_noref_force(struct sk_buff *skb,
 					   struct dst_entry *dst)
 {
 	__skb_dst_set_noref(skb, dst, true);
 }
 /**
  * skb_dst_is_noref - Test if skb dst isn't refcounted
  * @skb: buffer
  */
 static inline bool skb_dst_is_noref(const struct sk_buff *skb)
 {
 	return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
 }
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 {
 	return (struct rtable *)skb_dst(skb);
 }
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
 void consume_skb(struct sk_buff *skb);
 void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize);
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
 			    int node);
 struct sk_buff *build_skb(void *data, unsigned int frag_size);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
 	return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
 }
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
 static inline struct sk_buff *alloc_skb_head(gfp_t priority)
 {
 	return __alloc_skb_head(priority, -1);
 }
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
 struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
 				     unsigned int headroom);
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
 				int newtailroom, gfp_t priority);
 int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
 		 int len);
 int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 int skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			    int getfrag(void *from, char *to, int offset,
 					int len, int odd, struct sk_buff *skb),
 			    void *from, int length);
 struct skb_seq_state {
 	__u32		lower_offset;
 	__u32		upper_offset;
 	__u32		frag_idx;
 	__u32		stepped_offset;
 	struct sk_buff	*root_skb;
 	struct sk_buff	*cur_skb;
 	__u8		*frag_data;
 };
 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
 			  unsigned int to, struct skb_seq_state *st);
 unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 			  struct skb_seq_state *st);
 void skb_abort_seq_read(struct skb_seq_state *st);
 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 			   unsigned int to, struct ts_config *config,
 			   struct ts_state *state);
 void __skb_get_rxhash(struct sk_buff *skb);
 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 {
 	if (!skb->l4_rxhash)
 		__skb_get_rxhash(skb);
 	return skb->rxhash;
 }
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
 	return skb->head + skb->end;
 }
 static inline unsigned int skb_end_offset(const struct sk_buff *skb)
 {
 	return skb->end;
 }
 #else
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
 	return skb->end;
 }
 static inline unsigned int skb_end_offset(const struct sk_buff *skb)
 {
 	return skb->end - skb->head;
 }
 #endif
 /* Internal */
 #define skb_shinfo(SKB)	((struct skb_shared_info *)(skb_end_pointer(SKB)))
 static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 {
 	return &skb_shinfo(skb)->hwtstamps;
 }
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
  *
  *	Returns true if the queue is empty, false otherwise.
  */
 static inline int skb_queue_empty(const struct sk_buff_head *list)
 {
 	return list->next == (struct sk_buff *)list;
 }
 /**
  *	skb_queue_is_last - check if skb is the last entry in the queue
  *	@list: queue head
  *	@skb: buffer
  *
  *	Returns true if @skb is the last buffer on the list.
  */
 static inline bool skb_queue_is_last(const struct sk_buff_head *list,
 				     const struct sk_buff *skb)
 {
 	return skb->next == (struct sk_buff *)list;
 }
 /**
  *	skb_queue_is_first - check if skb is the first entry in the queue
  *	@list: queue head
  *	@skb: buffer
  *
  *	Returns true if @skb is the first buffer on the list.
  */
 static inline bool skb_queue_is_first(const struct sk_buff_head *list,
 				      const struct sk_buff *skb)
 {
 	return skb->prev == (struct sk_buff *)list;
 }
 /**
  *	skb_queue_next - return the next packet in the queue
  *	@list: queue head
  *	@skb: current buffer
  *
  *	Return the next packet in @list after @skb.  It is only valid to
  *	call this if skb_queue_is_last() evaluates to false.
  */
 static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list,
 					     const struct sk_buff *skb)
 {
 	/* This BUG_ON may seem severe, but if we just return then we
 	 * are going to dereference garbage.
 	 */
 	BUG_ON(skb_queue_is_last(list, skb));
 	return skb->next;
 }
 /**
  *	skb_queue_prev - return the prev packet in the queue
  *	@list: queue head
  *	@skb: current buffer
  *
  *	Return the prev packet in @list before @skb.  It is only valid to
  *	call this if skb_queue_is_first() evaluates to false.
  */
 static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
 					     const struct sk_buff *skb)
 {
 	/* This BUG_ON may seem severe, but if we just return then we
 	 * are going to dereference garbage.
 	 */
 	BUG_ON(skb_queue_is_first(list, skb));
 	return skb->prev;
 }
 /**
  *	skb_get - reference buffer
  *	@skb: buffer to reference
  *
  *	Makes another reference to a socket buffer and returns a pointer
  *	to the buffer.
  */
 static inline struct sk_buff *skb_get(struct sk_buff *skb)
 {
 	atomic_inc(&skb->users);
 	return skb;
 }
 /*
  * If users == 1, we are the only owner and are can avoid redundant
  * atomic change.
  */
 /**
  *	skb_cloned - is the buffer a clone
  *	@skb: buffer to check
  *
  *	Returns true if the buffer was generated with skb_clone() and is
  *	one of multiple shared copies of the buffer. Cloned buffers are
  *	shared data so must not be written to under normal circumstances.
  */
 static inline int skb_cloned(const struct sk_buff *skb)
 {
 	return skb->cloned &&
 	       (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
 }
 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_cloned(skb))
 		return pskb_expand_head(skb, 0, 0, pri);
 	return 0;
 }
 /**
  *	skb_header_cloned - is the header a clone
  *	@skb: buffer to check
  *
  *	Returns true if modifying the header part of the buffer requires
  *	the data to be copied.
  */
 static inline int skb_header_cloned(const struct sk_buff *skb)
 {
 	int dataref;
 	if (!skb->cloned)
 		return 0;
 	dataref = atomic_read(&skb_shinfo(skb)->dataref);
 	dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
 	return dataref != 1;
 }
 /**
  *	skb_header_release - release reference to header
  *	@skb: buffer to operate on
  *
  *	Drop a reference to the header part of the buffer.  This is done
  *	by acquiring a payload reference.  You must not read from the header
  *	part of skb->data after this.
  */
 static inline void skb_header_release(struct sk_buff *skb)
 {
 	BUG_ON(skb->nohdr);
 	skb->nohdr = 1;
 	atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
 }
 /**
  *	skb_shared - is the buffer shared
  *	@skb: buffer to check
  *
  *	Returns true if more than one person has a reference to this
  *	buffer.
  */
 static inline int skb_shared(const struct sk_buff *skb)
 {
 	return atomic_read(&skb->users) != 1;
 }
 /**
  *	skb_share_check - check if buffer is shared and if so clone it
  *	@skb: buffer to check
  *	@pri: priority for memory allocation
  *
  *	If the buffer is shared the buffer is cloned and the old copy
  *	drops a reference. A new clone with a single reference is returned.
  *	If the buffer is not shared the original buffer is returned. When
  *	being called from interrupt status or with spinlocks held pri must
  *	be GFP_ATOMIC.
  *
  *	NULL is returned on a memory allocation failure.
  */
 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, pri);
 		if (likely(nskb))
 			consume_skb(skb);
 		else
 			kfree_skb(skb);
 		skb = nskb;
 	}
 	return skb;
 }
 /*
  *	Copy shared buffers into a new sk_buff. We effectively do COW on
  *	packets to handle cases where we have a local reader and forward
  *	and a couple of other messy ones. The normal one is tcpdumping
  *	a packet thats being forwarded.
  */
 /**
  *	skb_unshare - make a copy of a shared buffer
  *	@skb: buffer to check
  *	@pri: priority for memory allocation
  *
  *	If the socket buffer is a clone then this function creates a new
  *	copy of the data, drops a reference count on the old copy and returns
  *	the new copy with the reference count at 1. If the buffer is not a clone
  *	the original buffer is returned. When called with a spinlock held or
  *	from interrupt state @pri must be %GFP_ATOMIC
  *
  *	%NULL is returned on a memory allocation failure.
  */
 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 					  gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_cloned(skb)) {
 		struct sk_buff *nskb = skb_copy(skb, pri);
 		kfree_skb(skb);	/* Free our shared copy */
 		skb = nskb;
 	}
 	return skb;
 }
 /**
  *	skb_peek - peek at the head of an &sk_buff_head
  *	@list_: list to peek at
  *
  *	Peek an &sk_buff. Unlike most other operations you _MUST_
  *	be careful with this one. A peek leaves the buffer on the
  *	list and someone else may run off with it. You must hold
  *	the appropriate locks or have a private queue to do this.
  *
  *	Returns %NULL for an empty list or a pointer to the head element.
  *	The reference count is not incremented and the reference is therefore
  *	volatile. Use with caution.
  */
 static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
 {
 	struct sk_buff *skb = list_->next;
 	if (skb == (struct sk_buff *)list_)
 		skb = NULL;
 	return skb;
 }
 /**
  *	skb_peek_next - peek skb following the given one from a queue
  *	@skb: skb to start from
  *	@list_: list to peek at
  *
  *	Returns %NULL when the end of the list is met or a pointer to the
  *	next element. The reference count is not incremented and the
  *	reference is therefore volatile. Use with caution.
  */
 static inline struct sk_buff *skb_peek_next(struct sk_buff *skb,
 		const struct sk_buff_head *list_)
 {
 	struct sk_buff *next = skb->next;
 	if (next == (struct sk_buff *)list_)
 		next = NULL;
 	return next;
 }
 /**
  *	skb_peek_tail - peek at the tail of an &sk_buff_head
  *	@list_: list to peek at
  *
  *	Peek an &sk_buff. Unlike most other operations you _MUST_
  *	be careful with this one. A peek leaves the buffer on the
  *	list and someone else may run off with it. You must hold
  *	the appropriate locks or have a private queue to do this.
  *
  *	Returns %NULL for an empty list or a pointer to the tail element.
  *	The reference count is not incremented and the reference is therefore
  *	volatile. Use with caution.
  */
 static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_)
 {
 	struct sk_buff *skb = list_->prev;
 	if (skb == (struct sk_buff *)list_)
 		skb = NULL;
 	return skb;
 }
 /**
  *	skb_queue_len	- get queue length
  *	@list_: list to measure
  *
  *	Return the length of an &sk_buff queue.
  */
 static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
 {
 	return list_->qlen;
 }
 /**
  *	__skb_queue_head_init - initialize non-spinlock portions of sk_buff_head
  *	@list: queue to initialize
  *
  *	This initializes only the list and queue length aspects of
  *	an sk_buff_head object.  This allows to initialize the list
  *	aspects of an sk_buff_head without reinitializing things like
  *	the spinlock.  It can also be used for on-stack sk_buff_head
  *	objects where the spinlock is known to not be used.
  */
 static inline void __skb_queue_head_init(struct sk_buff_head *list)
 {
 	list->prev = list->next = (struct sk_buff *)list;
 	list->qlen = 0;
 }
 /*
  * This function creates a split out lock class for each invocation;
  * this is needed for now since a whole lot of users of the skb-queue
  * infrastructure in drivers have different locking usage (in hardirq)
  * than the networking core (in softirq only). In the long run either the
  * network layer or drivers should need annotation to consolidate the
  * main types of usage into 3 classes.
  */
 static inline void skb_queue_head_init(struct sk_buff_head *list)
 {
 	spin_lock_init(&list->lock);
 	__skb_queue_head_init(list);
 }
 static inline void skb_queue_head_init_class(struct sk_buff_head *list,
 		struct lock_class_key *class)
 {
 	skb_queue_head_init(list);
 	lockdep_set_class(&list->lock, class);
 }
 /*
  *	Insert an sk_buff on a list.
  *
  *	The "__skb_xxxx()" functions are the non-atomic ones that
  *	can only be called with interrupts disabled.
  */
 void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
 		struct sk_buff_head *list);
 static inline void __skb_insert(struct sk_buff *newsk,
 				struct sk_buff *prev, struct sk_buff *next,
 				struct sk_buff_head *list)
 {
 	newsk->next = next;
 	newsk->prev = prev;
 	next->prev  = prev->next = newsk;
 	list->qlen++;
 }
 static inline void __skb_queue_splice(const struct sk_buff_head *list,
 				      struct sk_buff *prev,
 				      struct sk_buff *next)
 {
 	struct sk_buff *first = list->next;
 	struct sk_buff *last = list->prev;
 	first->prev = prev;
 	prev->next = first;
 	last->next = next;
 	next->prev = last;
 }
 /**
  *	skb_queue_splice - join two skb lists, this is designed for stacks
  *	@list: the new list to add
  *	@head: the place to add it in the first list
  */
 static inline void skb_queue_splice(const struct sk_buff_head *list,
 				    struct sk_buff_head *head)
 {
 	if (!skb_queue_empty(list)) {
 		__skb_queue_splice(list, (struct sk_buff *) head, head->next);
 		head->qlen += list->qlen;
 	}
 }
 /**
  *	skb_queue_splice_init - join two skb lists and reinitialise the emptied list
  *	@list: the new list to add
  *	@head: the place to add it in the first list
  *
  *	The list at @list is reinitialised
  */
 static inline void skb_queue_splice_init(struct sk_buff_head *list,
 					 struct sk_buff_head *head)
 {
 	if (!skb_queue_empty(list)) {
 		__skb_queue_splice(list, (struct sk_buff *) head, head->next);
 		head->qlen += list->qlen;
 		__skb_queue_head_init(list);
 	}
 }
 /**
  *	skb_queue_splice_tail - join two skb lists, each list being a queue
  *	@list: the new list to add
  *	@head: the place to add it in the first list
  */
 static inline void skb_queue_splice_tail(const struct sk_buff_head *list,
 					 struct sk_buff_head *head)
 {
 	if (!skb_queue_empty(list)) {
 		__skb_queue_splice(list, head->prev, (struct sk_buff *) head);
 		head->qlen += list->qlen;
 	}
 }
 /**
  *	skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list
  *	@list: the new list to add
  *	@head: the place to add it in the first list
  *
  *	Each of the lists is a queue.
  *	The list at @list is reinitialised
  */
 static inline void skb_queue_splice_tail_init(struct sk_buff_head *list,
 					      struct sk_buff_head *head)
 {
 	if (!skb_queue_empty(list)) {
 		__skb_queue_splice(list, head->prev, (struct sk_buff *) head);
 		head->qlen += list->qlen;
 		__skb_queue_head_init(list);
 	}
 }
 /**
  *	__skb_queue_after - queue a buffer at the list head
  *	@list: list to use
  *	@prev: place after this buffer
  *	@newsk: buffer to queue
  *
  *	Queue a buffer int the middle of a list. This function takes no locks
  *	and you must therefore hold required locks before calling it.
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
 static inline void __skb_queue_after(struct sk_buff_head *list,
 				     struct sk_buff *prev,
 				     struct sk_buff *newsk)
 {
 	__skb_insert(newsk, prev, prev->next, list);
 }
 void skb_append(struct sk_buff *old, struct sk_buff *newsk,
 		struct sk_buff_head *list);
 static inline void __skb_queue_before(struct sk_buff_head *list,
 				      struct sk_buff *next,
 				      struct sk_buff *newsk)
 {
 	__skb_insert(newsk, next->prev, next, list);
 }
 /**
  *	__skb_queue_head - queue a buffer at the list head
  *	@list: list to use
  *	@newsk: buffer to queue
  *
  *	Queue a buffer at the start of a list. This function takes no locks
  *	and you must therefore hold required locks before calling it.
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
 				    struct sk_buff *newsk)
 {
 	__skb_queue_after(list, (struct sk_buff *)list, newsk);
 }
 /**
  *	__skb_queue_tail - queue a buffer at the list tail
  *	@list: list to use
  *	@newsk: buffer to queue
  *
  *	Queue a buffer at the end of a list. This function takes no locks
  *	and you must therefore hold required locks before calling it.
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
 				   struct sk_buff *newsk)
 {
 	__skb_queue_before(list, (struct sk_buff *)list, newsk);
 }
 /*
  * remove sk_buff from list. _Must_ be called atomically, and with
  * the list known..
  */
 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
 	struct sk_buff *next, *prev;
 	list->qlen--;
 	next	   = skb->next;
 	prev	   = skb->prev;
 	skb->next  = skb->prev = NULL;
 	next->prev = prev;
 	prev->next = next;
 }
 /**
  *	__skb_dequeue - remove from the head of the queue
  *	@list: list to dequeue from
  *
  *	Remove the head of the list. This function does not take any locks
  *	so must be used with appropriate locks held only. The head item is
  *	returned or %NULL if the list is empty.
  */
 struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek(list);
 	if (skb)
 		__skb_unlink(skb, list);
 	return skb;
 }
 /**
  *	__skb_dequeue_tail - remove from the tail of the queue
  *	@list: list to dequeue from
  *
  *	Remove the tail of the list. This function does not take any locks
  *	so must be used with appropriate locks held only. The tail item is
  *	returned or %NULL if the list is empty.
  */
 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek_tail(list);
 	if (skb)
 		__skb_unlink(skb, list);
 	return skb;
 }
 static inline bool skb_is_nonlinear(const struct sk_buff *skb)
 {
 	return skb->data_len;
 }
 static inline unsigned int skb_headlen(const struct sk_buff *skb)
 {
 	return skb->len - skb->data_len;
 }
 static inline int skb_pagelen(const struct sk_buff *skb)
 {
 	int i, len = 0;
 	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
 		len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
 	return len + skb_headlen(skb);
 }
 /**
  * __skb_fill_page_desc - initialise a paged fragment in an skb
  * @skb: buffer containing fragment to be initialised
  * @i: paged fragment index to initialise
  * @page: the page to use for this fragment
  * @off: the offset to the data with @page
  * @size: the length of the data
  *
  * Initialises the @i'th fragment of @skb to point to &size bytes at
  * offset @off within @page.
  *
  * Does not take any additional reference on the fragment.
  */
 static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 					struct page *page, int off, int size)
 {
 	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 	/*
 	 * Propagate page->pfmemalloc to the skb if we can. The problem is
 	 * that not all callers have unique ownership of the page. If
 	 * pfmemalloc is set, we check the mapping as a mapping implies
 	 * page->index is set (index and pfmemalloc share space).
 	 * If it's a valid mapping, we cannot use page->pfmemalloc but we
 	 * do not lose pfmemalloc information as the pages would not be
 	 * allocated using __GFP_MEMALLOC.
 	 */
 	frag->page.p		  = page;
 	frag->page_offset	  = off;
 	skb_frag_size_set(frag, size);
 	page = compound_head(page);
 	if (page->pfmemalloc && !page->mapping)
 		skb->pfmemalloc	= true;
 }
 /**
  * skb_fill_page_desc - initialise a paged fragment in an skb
  * @skb: buffer containing fragment to be initialised
  * @i: paged fragment index to initialise
  * @page: the page to use for this fragment
  * @off: the offset to the data with @page
  * @size: the length of the data
  *
  * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
  * @skb to point to @size bytes at offset @off within @page. In
  * addition updates @skb such that @i is the last fragment.
  *
  * Does not take any additional reference on the fragment.
  */
 static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 				      struct page *page, int off, int size)
 {
 	__skb_fill_page_desc(skb, i, page, off, size);
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize);
 void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
 			  unsigned int truesize);
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
 {
 	return skb->head + skb->tail;
 }
 static inline void skb_reset_tail_pointer(struct sk_buff *skb)
 {
 	skb->tail = skb->data - skb->head;
 }
 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 {
 	skb_reset_tail_pointer(skb);
 	skb->tail += offset;
 }
 #else /* NET_SKBUFF_DATA_USES_OFFSET */
 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
 {
 	return skb->tail;
 }
 static inline void skb_reset_tail_pointer(struct sk_buff *skb)
 {
 	skb->tail = skb->data;
 }
 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 {
 	skb->tail = skb->data + offset;
 }
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 /*
  *	Add data to an sk_buff
  */
 unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
 	unsigned char *tmp = skb_tail_pointer(skb);
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
 	return tmp;
 }
 unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
 	skb->len  += len;
 	return skb->data;
 }
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	skb->len -= len;
 	BUG_ON(skb->len < skb->data_len);
 	return skb->data += len;
 }
 static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len)
 {
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (len > skb_headlen(skb) &&
 	    !__pskb_pull_tail(skb, len - skb_headlen(skb)))
 		return NULL;
 	skb->len -= len;
 	return skb->data += len;
 }
 static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
 {
 	return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
 }
 static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (likely(len <= skb_headlen(skb)))
 		return 1;
 	if (unlikely(len > skb->len))
 		return 0;
 	return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
 }
 /**
  *	skb_headroom - bytes at buffer head
  *	@skb: buffer to check
  *
  *	Return the number of bytes of free space at the head of an &sk_buff.
  */
 static inline unsigned int skb_headroom(const struct sk_buff *skb)
 {
 	return skb->data - skb->head;
 }
 /**
  *	skb_tailroom - bytes at buffer end
  *	@skb: buffer to check
  *
  *	Return the number of bytes of free space at the tail of an sk_buff
  */
 static inline int skb_tailroom(const struct sk_buff *skb)
 {
 	return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
 }
 /**
  *	skb_availroom - bytes at buffer end
  *	@skb: buffer to check
  *
  *	Return the number of bytes of free space at the tail of an sk_buff
  *	allocated by sk_stream_alloc()
  */
 static inline int skb_availroom(const struct sk_buff *skb)
 {
 	if (skb_is_nonlinear(skb))
 		return 0;
 	return skb->end - skb->tail - skb->reserved_tailroom;
 }
 /**
  *	skb_reserve - adjust headroom
  *	@skb: buffer to alter
  *	@len: bytes to move
  *
  *	Increase the headroom of an empty &sk_buff by reducing the tail
  *	room. This is only allowed for an empty buffer.
  */
 static inline void skb_reserve(struct sk_buff *skb, int len)
 {
 	skb->data += len;
 	skb->tail += len;
 }
 static inline void skb_reset_inner_headers(struct sk_buff *skb)
 {
 	skb->inner_mac_header = skb->mac_header;
 	skb->inner_network_header = skb->network_header;
 	skb->inner_transport_header = skb->transport_header;
 }
 static inline void skb_reset_mac_len(struct sk_buff *skb)
 {
 	skb->mac_len = skb->network_header - skb->mac_header;
 }
 static inline unsigned char *skb_inner_transport_header(const struct sk_buff
 							*skb)
 {
 	return skb->head + skb->inner_transport_header;
 }
 static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
 {
 	skb->inner_transport_header = skb->data - skb->head;
 }
 static inline void skb_set_inner_transport_header(struct sk_buff *skb,
 						   const int offset)
 {
 	skb_reset_inner_transport_header(skb);
 	skb->inner_transport_header += offset;
 }
 static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->inner_network_header;
 }
 static inline void skb_reset_inner_network_header(struct sk_buff *skb)
 {
 	skb->inner_network_header = skb->data - skb->head;
 }
 static inline void skb_set_inner_network_header(struct sk_buff *skb,
 						const int offset)
 {
 	skb_reset_inner_network_header(skb);
 	skb->inner_network_header += offset;
 }
 static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->inner_mac_header;
 }
 static inline void skb_reset_inner_mac_header(struct sk_buff *skb)
 {
 	skb->inner_mac_header = skb->data - skb->head;
 }
 static inline void skb_set_inner_mac_header(struct sk_buff *skb,
 					    const int offset)
 {
 	skb_reset_inner_mac_header(skb);
 	skb->inner_mac_header += offset;
 }
 static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
 {
 	return skb->transport_header != (typeof(skb->transport_header))~0U;
 }
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->transport_header;
 }
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
 	skb->transport_header = skb->data - skb->head;
 }
 static inline void skb_set_transport_header(struct sk_buff *skb,
 					    const int offset)
 {
 	skb_reset_transport_header(skb);
 	skb->transport_header += offset;
 }
 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->network_header;
 }
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
 	skb->network_header = skb->data - skb->head;
 }
 static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
 {
 	skb_reset_network_header(skb);
 	skb->network_header += offset;
 }
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->mac_header;
 }
 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
 }
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
 	skb->mac_header = skb->data - skb->head;
 }
 static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 {
 	skb_reset_mac_header(skb);
 	skb->mac_header += offset;
 }
 static inline void skb_probe_transport_header(struct sk_buff *skb,
 					      const int offset_hint)
 {
 	struct flow_keys keys;
 	if (skb_transport_header_was_set(skb))
 		return;
 	else if (skb_flow_dissect(skb, &keys))
 		skb_set_transport_header(skb, keys.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
 }
 static inline void skb_mac_header_rebuild(struct sk_buff *skb)
 {
 	if (skb_mac_header_was_set(skb)) {
 		const unsigned char *old_mac = skb_mac_header(skb);
 		skb_set_mac_header(skb, -skb->mac_len);
 		memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	}
 }
 static inline int skb_checksum_start_offset(const struct sk_buff *skb)
 {
 	return skb->csum_start - skb_headroom(skb);
 }
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
 	return skb_transport_header(skb) - skb->data;
 }
 static inline u32 skb_network_header_len(const struct sk_buff *skb)
 {
 	return skb->transport_header - skb->network_header;
 }
 static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
 {
 	return skb->inner_transport_header - skb->inner_network_header;
 }
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
 	return skb_network_header(skb) - skb->data;
 }
 static inline int skb_inner_network_offset(const struct sk_buff *skb)
 {
 	return skb_inner_network_header(skb) - skb->data;
 }
 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
  * hardware handles it or large if we have to take an exception and fix it
  * in software.
  *
  * Since an ethernet header is 14 bytes network drivers often end up with
  * the IP header at an unaligned offset. The IP header can be aligned by
  * shifting the start of the packet by 2 bytes. Drivers should do this
  * with:
  *
  * skb_reserve(skb, NET_IP_ALIGN);
  *
  * The downside to this alignment of the IP header is that the DMA is now
  * unaligned. On some architectures the cost of an unaligned DMA is high
  * and this cost outweighs the gains made by aligning the IP header.
  *
  * Since this trade off varies between architectures, we allow NET_IP_ALIGN
  * to be overridden.
  */
 #ifndef NET_IP_ALIGN
 #define NET_IP_ALIGN	2
 #endif
 /*
  * The networking layer reserves some headroom in skb data (via
  * dev_alloc_skb). This is used to avoid having to reallocate skb data when
  * the header has to grow. In the default case, if the header has to grow
  * 32 bytes or less we avoid the reallocation.
  *
  * Unfortunately this headroom changes the DMA alignment of the resulting
  * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
  * on some architectures. An architecture can override this value,
  * perhaps setting it to a cacheline in size (since that will maintain
  * cacheline alignment of the DMA). It must be a power of 2.
  *
  * Various parts of the networking layer expect at least 32 bytes of
  * headroom, you should not reduce this.
  *
  * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
  * to reduce average number of cache lines per packet.
  * get_rps_cpus() for example only access one 64 bytes aligned block :
  * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
  */
 #ifndef NET_SKB_PAD
 #define NET_SKB_PAD	max(32, L1_CACHE_BYTES)
 #endif
 int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
 	if (unlikely(skb_is_nonlinear(skb))) {
 		WARN_ON(1);
 		return;
 	}
 	skb->len = len;
 	skb_set_tail_pointer(skb, len);
 }
 void skb_trim(struct sk_buff *skb, unsigned int len);
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
 	if (skb->data_len)
 		return ___pskb_trim(skb, len);
 	__skb_trim(skb, len);
 	return 0;
 }
 static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
 {
 	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
 }
 /**
  *	pskb_trim_unique - remove end from a paged unique (not cloned) buffer
  *	@skb: buffer to alter
  *	@len: new length
  *
  *	This is identical to pskb_trim except that the caller knows that
  *	the skb is not cloned so we should never get an error due to out-
  *	of-memory.
  */
 static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
 {
 	int err = pskb_trim(skb, len);
 	BUG_ON(err);
 }
 /**
  *	skb_orphan - orphan a buffer
  *	@skb: buffer to orphan
  *
  *	If a buffer currently has an owner then we call the owner's
  *	destructor function and make the @skb unowned. The buffer continues
  *	to exist but is no longer charged to its former owner.
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
 	if (skb->destructor) {
 		skb->destructor(skb);
 		skb->destructor = NULL;
 		skb->sk		= NULL;
 	} else {
 		BUG_ON(skb->sk);
 	}
 }
 /**
  *	skb_orphan_frags - orphan the frags contained in a buffer
  *	@skb: buffer to orphan frags from
  *	@gfp_mask: allocation mask for replacement pages
  *
  *	For each frag in the SKB which needs a destructor (i.e. has an
  *	owner) create a copy of that frag and release the original
  *	page by calling the destructor.
  */
 static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
 		return 0;
 	return skb_copy_ubufs(skb, gfp_mask);
 }
 /**
  *	__skb_queue_purge - empty a list
  *	@list: list to empty
  *
  *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  *	the list and one reference dropped. This function does not take the
  *	list lock and the caller must hold the relevant locks to use it.
  */
 void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
 	while ((skb = __skb_dequeue(list)) != NULL)
 		kfree_skb(skb);
 }
 #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
 #define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
 #define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 void *netdev_alloc_frag(unsigned int fragsz);
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
 				   gfp_t gfp_mask);
 /**
  *	netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate
  *
  *	Allocate a new &sk_buff and assign it a usage count of one. The
  *	buffer has unspecified headroom built in. Users should allocate
  *	the headroom they think they need without accounting for the
  *	built in space. The built in space is used for optimisations.
  *
  *	%NULL is returned if there is no free memory. Although this function
  *	allocates memory it can be called from an interrupt.
  */
 static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
 					       unsigned int length)
 {
 	return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
 }
 /* legacy helper around __netdev_alloc_skb() */
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
 {
 	return __netdev_alloc_skb(NULL, length, gfp_mask);
 }
 /* legacy helper around netdev_alloc_skb() */
 static inline struct sk_buff *dev_alloc_skb(unsigned int length)
 {
 	return netdev_alloc_skb(NULL, length);
 }
 static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev,
 		unsigned int length, gfp_t gfp)
 {
 	struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);
 	if (NET_IP_ALIGN && skb)
 		skb_reserve(skb, NET_IP_ALIGN);
 	return skb;
 }
 static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 		unsigned int length)
 {
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 /**
  *	__skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
  *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
  *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
  *	@order: size of the allocation
  *
  * 	Allocate a new page.
  *
  * 	%NULL is returned if there is no free memory.
 */
 static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
 					      struct sk_buff *skb,
 					      unsigned int order)
 {
 	struct page *page;
 	gfp_mask |= __GFP_COLD;
 	if (!(gfp_mask & __GFP_NOMEMALLOC))
 		gfp_mask |= __GFP_MEMALLOC;
 	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
 	if (skb && page && page->pfmemalloc)
 		skb->pfmemalloc = true;
 	return page;
 }
 /**
  *	__skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
  *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
  *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
  *
  * 	Allocate a new page.
  *
  * 	%NULL is returned if there is no free memory.
  */
 static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
 					     struct sk_buff *skb)
 {
 	return __skb_alloc_pages(gfp_mask, skb, 0);
 }
 /**
  *	skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
  *	@page: The page that was allocated from skb_alloc_page
  *	@skb: The skb that may need pfmemalloc set
  */
 static inline void skb_propagate_pfmemalloc(struct page *page,
 					     struct sk_buff *skb)
 {
 	if (page && page->pfmemalloc)
 		skb->pfmemalloc = true;
 }
 /**
  * skb_frag_page - retrieve the page refered to by a paged fragment
  * @frag: the paged fragment
  *
  * Returns the &struct page associated with @frag.
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
 	return frag->page.p;
 }
 /**
  * __skb_frag_ref - take an addition reference on a paged fragment.
  * @frag: the paged fragment
  *
  * Takes an additional reference on the paged fragment @frag.
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
 	get_page(skb_frag_page(frag));
 }
 /**
  * skb_frag_ref - take an addition reference on a paged fragment of an skb.
  * @skb: the buffer
  * @f: the fragment offset.
  *
  * Takes an additional reference on the @f'th paged fragment of @skb.
  */
 static inline void skb_frag_ref(struct sk_buff *skb, int f)
 {
 	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
 }
 /**
  * __skb_frag_unref - release a reference on a paged fragment.
  * @frag: the paged fragment
  *
  * Releases a reference on the paged fragment @frag.
  */
 static inline void __skb_frag_unref(skb_frag_t *frag)
 {
 	put_page(skb_frag_page(frag));
 }
 /**
  * skb_frag_unref - release a reference on a paged fragment of an skb.
  * @skb: the buffer
  * @f: the fragment offset
  *
  * Releases a reference on the @f'th paged fragment of @skb.
  */
 static inline void skb_frag_unref(struct sk_buff *skb, int f)
 {
 	__skb_frag_unref(&skb_shinfo(skb)->frags[f]);
 }
 /**
  * skb_frag_address - gets the address of the data contained in a paged fragment
  * @frag: the paged fragment buffer
  *
  * Returns the address of the data within @frag. The page must already
  * be mapped.
  */
 static inline void *skb_frag_address(const skb_frag_t *frag)
 {
 	return page_address(skb_frag_page(frag)) + frag->page_offset;
 }
 /**
  * skb_frag_address_safe - gets the address of the data contained in a paged fragment
  * @frag: the paged fragment buffer
  *
  * Returns the address of the data within @frag. Checks that the page
  * is mapped and returns %NULL otherwise.
  */
 static inline void *skb_frag_address_safe(const skb_frag_t *frag)
 {
 	void *ptr = page_address(skb_frag_page(frag));
 	if (unlikely(!ptr))
 		return NULL;
 	return ptr + frag->page_offset;
 }
 /**
  * __skb_frag_set_page - sets the page contained in a paged fragment
  * @frag: the paged fragment
  * @page: the page to set
  *
  * Sets the fragment @frag to contain @page.
  */
 static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
 {
 	frag->page.p = page;
 }
 /**
  * skb_frag_set_page - sets the page contained in a paged fragment of an skb
  * @skb: the buffer
  * @f: the fragment offset
  * @page: the page to set
  *
  * Sets the @f'th fragment of @skb to contain @page.
  */
 static inline void skb_frag_set_page(struct sk_buff *skb, int f,
 				     struct page *page)
 {
 	__skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
 }
 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
 /**
  * skb_frag_dma_map - maps a paged fragment via the DMA API
  * @dev: the device to map the fragment to
  * @frag: the paged fragment to map
  * @offset: the offset within the fragment (starting at the
  *          fragment's own offset)
  * @size: the number of bytes to map
  * @dir: the direction of the mapping (%PCI_DMA_*)
  *
  * Maps the page associated with @frag to @device.
  */
 static inline dma_addr_t skb_frag_dma_map(struct device *dev,
 					  const skb_frag_t *frag,
 					  size_t offset, size_t size,
 					  enum dma_data_direction dir)
 {
 	return dma_map_page(dev, skb_frag_page(frag),
 			    frag->page_offset + offset, size, dir);
 }
 static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
 					gfp_t gfp_mask)
 {
 	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
 }
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
  *	@len: length up to which to write
  *
  *	Returns true if modifying the header part of the cloned buffer
  *	does not requires the data to be copied.
  */
 static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len)
 {
 	return !skb_header_cloned(skb) &&
 	       skb_headroom(skb) + len <= skb->hdr_len;
 }
 static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
 			    int cloned)
 {
 	int delta = 0;
 	if (headroom > skb_headroom(skb))
 		delta = headroom - skb_headroom(skb);
 	if (delta || cloned)
 		return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
 					GFP_ATOMIC);
 	return 0;
 }
 /**
  *	skb_cow - copy header of skb when it is required
  *	@skb: buffer to cow
  *	@headroom: needed headroom
  *
  *	If the skb passed lacks sufficient headroom or its data part
  *	is shared, data is reallocated. If reallocation fails, an error
  *	is returned and original skb is not changed.
  *
  *	The result is skb with writable area skb->head...skb->tail
  *	and at least @headroom of space at head.
  */
 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
 {
 	return __skb_cow(skb, headroom, skb_cloned(skb));
 }
 /**
  *	skb_cow_head - skb_cow but only making the head writable
  *	@skb: buffer to cow
  *	@headroom: needed headroom
  *
  *	This function is identical to skb_cow except that we replace the
  *	skb_cloned check by skb_header_cloned.  It should be used when
  *	you only need to push on some header and do not need to modify
  *	the data.
  */
 static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
 {
 	return __skb_cow(skb, headroom, skb_header_cloned(skb));
 }
 /**
  *	skb_padto	- pad an skbuff up to a minimal size
  *	@skb: buffer to pad
  *	@len: minimal length
  *
  *	Pads up a buffer to ensure the trailing bytes exist and are
  *	blanked. If the buffer already contains sufficient data it
  *	is untouched. Otherwise it is extended. Returns zero on
  *	success. The skb is freed on error.
  */
 static inline int skb_padto(struct sk_buff *skb, unsigned int len)
 {
 	unsigned int size = skb->len;
 	if (likely(size >= len))
 		return 0;
 	return skb_pad(skb, len - size);
 }
 static inline int skb_add_data(struct sk_buff *skb,
 			       char __user *from, int copy)
 {
 	const int off = skb->len;
 	if (skb->ip_summed == CHECKSUM_NONE) {
 		int err = 0;
 		__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
 							    copy, 0, &err);
 		if (!err) {
 			skb->csum = csum_block_add(skb->csum, csum, off);
 			return 0;
 		}
 	} else if (!copy_from_user(skb_put(skb, copy), from, copy))
 		return 0;
 	__skb_trim(skb, off);
 	return -EFAULT;
 }
 static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 				    const struct page *page, int off)
 {
 	if (i) {
 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
 		return page == skb_frag_page(frag) &&
 		       off == frag->page_offset + skb_frag_size(frag);
 	}
 	return false;
 }
 static inline int __skb_linearize(struct sk_buff *skb)
 {
 	return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
 }
 /**
  *	skb_linearize - convert paged skb to linear one
  *	@skb: buffer to linarize
  *
  *	If there is no free memory -ENOMEM is returned, otherwise zero
  *	is returned and the old skb data released.
  */
 static inline int skb_linearize(struct sk_buff *skb)
 {
 	return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
 }
 /**
  * skb_has_shared_frag - can any frag be overwritten
  * @skb: buffer to test
  *
  * Return true if the skb has at least one frag that might be modified
  * by an external entity (as in vmsplice()/sendfile())
  */
 static inline bool skb_has_shared_frag(const struct sk_buff *skb)
 {
 	return skb_is_nonlinear(skb) &&
 	       skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 }
 /**
  *	skb_linearize_cow - make sure skb is linear and writable
  *	@skb: buffer to process
  *
  *	If there is no free memory -ENOMEM is returned, otherwise zero
  *	is returned and the old skb data released.
  */
 static inline int skb_linearize_cow(struct sk_buff *skb)
 {
 	return skb_is_nonlinear(skb) || skb_cloned(skb) ?
 	       __skb_linearize(skb) : 0;
 }
 /**
  *	skb_postpull_rcsum - update checksum for received skb after pull
  *	@skb: buffer to update
  *	@start: start of data before pull
  *	@len: length of data pulled
  *
  *	After doing a pull on a received packet, you need to call this to
  *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 }
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 /**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
  *	@len: new length
  *
  *	This is exactly the same as pskb_trim except that it ensures the
  *	checksum of received packets are still valid after the operation.
  */
 static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 {
 	if (likely(len >= skb->len))
 		return 0;
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 	return __pskb_trim(skb, len);
 }
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->next)
 #define skb_queue_walk_safe(queue, skb, tmp)					\
 		for (skb = (queue)->next, tmp = skb->next;			\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = tmp, tmp = skb->next)
 #define skb_queue_walk_from(queue, skb)						\
 		for (; skb != (struct sk_buff *)(queue);			\
 		     skb = skb->next)
 #define skb_queue_walk_from_safe(queue, skb, tmp)				\
 		for (tmp = skb->next;						\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = tmp, tmp = skb->next)
 #define skb_queue_reverse_walk(queue, skb) \
 		for (skb = (queue)->prev;					\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->prev)
 #define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
 		for (skb = (queue)->prev, tmp = skb->prev;			\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = tmp, tmp = skb->prev)
 #define skb_queue_reverse_walk_from_safe(queue, skb, tmp)			\
 		for (tmp = skb->prev;						\
 		     skb != (struct sk_buff *)(queue);				\
 		     skb = tmp, tmp = skb->prev)
 static inline bool skb_has_frag_list(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->frag_list != NULL;
 }
 static inline void skb_frag_list_init(struct sk_buff *skb)
 {
 	skb_shinfo(skb)->frag_list = NULL;
 }
 static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
 {
 	frag->next = skb_shinfo(skb)->frag_list;
 	skb_shinfo(skb)->frag_list = frag;
 }
 #define skb_walk_frags(skb, iter)	\
 	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
 				    int *peeked, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
 unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
 			    struct iovec *to, int size);
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 				 const struct iovec *from, int from_offset,
 				 int len);
 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
 			   int offset, size_t count);
 int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
 				  const struct iovec *to, int to_offset,
 				  int size);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
 			      int len, __wsum csum);
 int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int len,
 		    unsigned int flags);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
 	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
 };
 __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
 	int hlen = skb_headlen(skb);
 	if (hlen - offset >= len)
 		return skb->data + offset;
 	if (skb_copy_bits(skb, offset, buffer, len) < 0)
 		return NULL;
 	return buffer;
 }
+/**
+ *	skb_needs_linearize - check if we need to linearize a given skb
+ *			      depending on the given device features.
+ *	@skb: socket buffer to check
+ *	@features: net device features
+ *
+ *	Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG.
+ */
+static inline bool skb_needs_linearize(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+		(skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG)));
+}
 static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
 					     void *to,
 					     const unsigned int len)
 {
 	memcpy(to, skb->data, len);
 }
 static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
 						    const int offset, void *to,
 						    const unsigned int len)
 {
 	memcpy(to, skb->data + offset, len);
 }
 static inline void skb_copy_to_linear_data(struct sk_buff *skb,
 					   const void *from,
 					   const unsigned int len)
 {
 	memcpy(skb->data, from, len);
 }
 static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 						  const int offset,
 						  const void *from,
 						  const unsigned int len)
 {
 	memcpy(skb->data + offset, from, len);
 }
 void skb_init(void);
 static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
 {
 	return skb->tstamp;
 }
 /**
  *	skb_get_timestamp - get timestamp from a skb
  *	@skb: skb to get stamp from
  *	@stamp: pointer to struct timeval to store stamp in
  *
  *	Timestamps are stored in the skb as offsets to a base timestamp.
  *	This function converts the offset back to a struct timeval and stores
  *	it in stamp.
  */
 static inline void skb_get_timestamp(const struct sk_buff *skb,
 				     struct timeval *stamp)
 {
 	*stamp = ktime_to_timeval(skb->tstamp);
 }
 static inline void skb_get_timestampns(const struct sk_buff *skb,
 				       struct timespec *stamp)
 {
 	*stamp = ktime_to_timespec(skb->tstamp);
 }
 static inline void __net_timestamp(struct sk_buff *skb)
 {
 	skb->tstamp = ktime_get_real();
 }
 static inline ktime_t net_timedelta(ktime_t t)
 {
 	return ktime_sub(ktime_get_real(), t);
 }
 static inline ktime_t net_invalid_timestamp(void)
 {
 	return ktime_set(0, 0);
 }
 void skb_timestamping_init(void);
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 void skb_clone_tx_timestamp(struct sk_buff *skb);
 bool skb_defer_rx_timestamp(struct sk_buff *skb);
 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
 static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
 {
 }
 static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 {
 	return false;
 }
 #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */
 /**
  * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
  *
  * PHY drivers may accept clones of transmitted packets for
  * timestamping via their phy_driver.txtstamp method. These drivers
  * must call this function to return the skb back to the stack, with
  * or without a timestamp.
  *
  * @skb: clone of the the original outgoing packet
  * @hwtstamps: hardware time stamps, may be NULL if not available
  *
  */
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps);
 /**
  * skb_tstamp_tx - queue clone of skb with send time stamps
  * @orig_skb:	the original outgoing packet
  * @hwtstamps:	hardware time stamps, may be NULL if not available
  *
  * If the skb has a socket associated, then this function clones the
  * skb (thus sharing the actual data and optional structures), stores
  * the optional hardware time stamping information (if non NULL) or
  * generates a software time stamp (otherwise), then queues the clone
  * to the error queue of the socket.  Errors are silently ignored.
  */
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		   struct skb_shared_hwtstamps *hwtstamps);
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
 	if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
 	    !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
 		skb_tstamp_tx(skb, NULL);
 }
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
  * Ethernet MAC Drivers should call this function in their hard_xmit()
  * function immediately before giving the sk_buff to the MAC hardware.
  *
  * @skb: A socket buffer.
  */
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
 	skb_clone_tx_timestamp(skb);
 	sw_tx_timestamp(skb);
 }
 /**
  * skb_complete_wifi_ack - deliver skb with wifi status
  *
  * @skb: the original outgoing packet
  * @acked: ack status
  *
  */
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 __sum16 __skb_checksum_complete(struct sk_buff *skb);
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
 	return skb->ip_summed & CHECKSUM_UNNECESSARY;
 }
 /**
  *	skb_checksum_complete - Calculate checksum of an entire packet
  *	@skb: packet to process
  *
  *	This function calculates the checksum over the entire packet plus
  *	the value of skb->csum.  The latter can be used to supply the
  *	checksum of a pseudo header as used by TCP/UDP.  It returns the
  *	checksum.
  *
  *	For protocols that contain complete checksums such as ICMP/TCP/UDP,
  *	this function can be used to verify that checksum on received
  *	packets.  In that case the function should return zero if the
  *	checksum is correct.  In particular, this function will return zero
  *	if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
  *	hardware has already verified the correctness of the checksum.
  */
 static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 {
 	return skb_csum_unnecessary(skb) ?
 	       0 : __skb_checksum_complete(skb);
 }
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
 		nf_conntrack_destroy(nfct);
 }
 static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 {
 	if (nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
 		kfree(nf_bridge);
 }
 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge)
 		atomic_inc(&nf_bridge->use);
 }
 #endif /* CONFIG_BRIDGE_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
 #endif
 }
 static inline void nf_reset_trace(struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 	skb->nf_trace = 0;
 #endif
 }
 /* Note: This doesn't put any conntrack and bridge info in dst. */
 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
 }
 static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
 	__nf_copy(dst, src);
 }
 #ifdef CONFIG_NETWORK_SECMARK
 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
 {
 	to->secmark = from->secmark;
 }
 static inline void skb_init_secmark(struct sk_buff *skb)
 {
 	skb->secmark = 0;
 }
 #else
 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
 { }
 static inline void skb_init_secmark(struct sk_buff *skb)
 { }
 #endif
 static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
 {
 	skb->queue_mapping = queue_mapping;
 }
 static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
 {
 	return skb->queue_mapping;
 }
 static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
 {
 	to->queue_mapping = from->queue_mapping;
 }
 static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
 {
 	skb->queue_mapping = rx_queue + 1;
 }
 static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
 {
 	return skb->queue_mapping - 1;
 }
 static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 {
 	return skb->queue_mapping != 0;
 }
 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues);
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
 #ifdef CONFIG_XFRM
 	return skb->sp;
 #else
 	return NULL;
 #endif
 }
 /* Keeps track of mac header offset relative to skb->head.
  * It is useful for TSO of Tunneling protocol. e.g. GRE.
  * For non-tunnel skb it points to skb_mac_header() and for
  * tunnel skb it points to outer mac header.
  * Keeps track of level of encapsulation of network headers.
  */
 struct skb_gso_cb {
 	int	mac_offset;
 	int	encap_level;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
 {
 	return (skb_mac_header(inner_skb) - inner_skb->head) -
 		SKB_GSO_CB(inner_skb)->mac_offset;
 }
 static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
 {
 	int new_headroom, headroom;
 	int ret;
 	headroom = skb_headroom(skb);
 	ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
 	if (ret)
 		return ret;
 	new_headroom = skb_headroom(skb);
 	SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
 	return 0;
 }
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
 }
 /* Note: Should be called only if skb_is_gso(skb) is true */
 static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 {
 	/* LRO sets gso_size but not gso_type, whereas if GSO is really
 	 * wanted then gso_type will be set. */
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 	if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
 	    unlikely(shinfo->gso_type == 0)) {
 		__skb_warn_lro_forwarding(skb);
 		return true;
 	}
 	return false;
 }
 static inline void skb_forward_csum(struct sk_buff *skb)
 {
 	/* Unfortunately we don't support this one.  Any brave souls? */
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 }
 /**
  * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE
  * @skb: skb to check
  *
  * fresh skbs have their ip_summed set to CHECKSUM_NONE.
  * Instead of forcing ip_summed to CHECKSUM_NONE, we can
  * use this helper, to document places where we make this assertion.
  */
 static inline void skb_checksum_none_assert(const struct sk_buff *skb)
 {
 #ifdef DEBUG
 	BUG_ON(skb->ip_summed != CHECKSUM_NONE);
 #endif
 }
 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 u32 __skb_get_poff(const struct sk_buff *skb);
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
  * @skb: skb to check
  *
  * The head on skbs build around a head frag can be removed if they are
  * not cloned.  This function returns true if the skb head is locked down
  * due to either being allocated via kmalloc, or by being a clone with
  * multiple references to the head.
  */
 static inline bool skb_head_is_locked(const struct sk_buff *skb)
 {
 	return !skb->head_frag || skb_cloned(skb);
 }
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */

 /*
  * 	NET3	Protocol independent device support routines.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
  *	Derived from the non IP parts of dev.c 1.0.19
  * 		Authors:	Ross Biro
  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  *
  *	Additional Authors:
  *		Florian la Roche <rzsfl@rz.uni-sb.de>
  *		Alan Cox <gw4pts@gw4pts.ampr.org>
  *		David Hinds <dahinds@users.sourceforge.net>
  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *		Adam Sulmicki <adam@cfar.umd.edu>
  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  *
  *	Changes:
  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  *              			to 2 if register_netdev gets called
  *              			before net_dev_init & also removed a
  *              			few lines of code in the process.
  *		Alan Cox	:	device private ioctl copies fields back.
  *		Alan Cox	:	Transmit queue code does relevant
  *					stunts to keep the queue safe.
  *		Alan Cox	:	Fixed double lock.
  *		Alan Cox	:	Fixed promisc NULL pointer trap
  *		????????	:	Support the full private ioctl range
  *		Alan Cox	:	Moved ioctl permission check into
  *					drivers
  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  *		Alan Cox	:	100 backlog just doesn't cut it when
  *					you start doing multicast video 8)
  *		Alan Cox	:	Rewrote net_bh and list manager.
  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  *		Alan Cox	:	Took out transmit every packet pass
  *					Saved a few bytes in the ioctl handler
  *		Alan Cox	:	Network driver sets packet type before
  *					calling netif_rx. Saves a function
  *					call a packet.
  *		Alan Cox	:	Hashed net_bh()
  *		Richard Kooijman:	Timestamp fixes.
  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  *		Alan Cox	:	Device lock protection.
  *		Alan Cox	: 	Fixed nasty side effect of device close
  *					changes.
  *		Rudi Cilibrasi	:	Pass the right thing to
  *					set_mac_address()
  *		Dave Miller	:	32bit quantity for the device lock to
  *					make it work out on a Sparc.
  *		Bjorn Ekwall	:	Added KERNELD hack.
  *		Alan Cox	:	Cleaned up the backlog initialise.
  *		Craig Metz	:	SIOCGIFCONF fix if space for under
  *					1 device.
  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  *					is no device open function.
  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  *		Cyrus Durgin	:	Cleaned for KMOD
  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  *					A network device unload needs to purge
  *					the backlog queue.
  *	Paul Rusty Russell	:	SIOCSIFNAME
  *              Pekka Riikonen  :	Netdev boot-time settings code
  *              Andrew Morton   :       Make unregister_netdevice wait
  *              			indefinitely on dev->refcnt
  * 		J Hadi Salim	:	- Backlog queue sampling
  *				        - netif_rx() feedback
  */
 #include <asm/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
 #include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <net/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
 #include <trace/events/net.h>
 #include <trace/events/skb.h>
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
 #include <linux/hashtable.h>
 #include <linux/vmalloc.h>
 #include <linux/if_macvlan.h>
 #include "net-sysfs.h"
 /* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
  *
  * Writers must hold the rtnl semaphore while they loop through the
  * dev_base_head list, and hold dev_base_lock for writing when they do the
  * actual updates.  This allows pure readers to access the list even
  * while a writer is preparing to update it.
  *
  * To put it another way, dev_base_lock is held for writing only to
  * protect against pure readers; the rtnl semaphore provides the
  * protection against other writers.
  *
  * See, for example usages, register_netdevice() and
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 /* protects napi_hash addition/deletion and napi_gen_id */
 static DEFINE_SPINLOCK(napi_hash_lock);
 static unsigned int napi_gen_id;
 static DEFINE_HASHTABLE(napi_hash, 8);
 static seqcount_t devnet_rename_seq;
 static inline void dev_base_seq_inc(struct net *net)
 {
 	while (++net->dev_base_seq == 0);
 }
 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 {
 	unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
 }
 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 {
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	spin_lock(&sd->input_pkt_queue.lock);
 #endif
 }
 static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	spin_unlock(&sd->input_pkt_queue.lock);
 #endif
 }
 /* Device list insertion */
 static void list_netdevice(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
 	ASSERT_RTNL();
 	write_lock_bh(&dev_base_lock);
 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	hlist_add_head_rcu(&dev->index_hlist,
 			   dev_index_hash(net, dev->ifindex));
 	write_unlock_bh(&dev_base_lock);
 	dev_base_seq_inc(net);
 }
 /* Device list removal
  * caller must respect a RCU grace period before freeing/reusing dev
  */
 static void unlist_netdevice(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	/* Unlink dev from the device chain */
 	write_lock_bh(&dev_base_lock);
 	list_del_rcu(&dev->dev_list);
 	hlist_del_rcu(&dev->name_hlist);
 	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
 	dev_base_seq_inc(dev_net(dev));
 }
 /*
  *	Our notifier list
  */
 static RAW_NOTIFIER_HEAD(netdev_chain);
 /*
  *	Device drivers call our routines to queue packets here. We empty the
  *	queue in the local softnet handler.
  */
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 #ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
  */
 static const unsigned short netdev_lock_type[] =
 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
 	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
 	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
 static const char *const netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
 	 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
 	 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 		if (netdev_lock_type[i] == dev_type)
 			return i;
 	/* the last key is used by default */
 	return ARRAY_SIZE(netdev_lock_type) - 1;
 }
 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 						 unsigned short dev_type)
 {
 	int i;
 	i = netdev_lock_pos(dev_type);
 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 				   netdev_lock_name[i]);
 }
 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 	int i;
 	i = netdev_lock_pos(dev->type);
 	lockdep_set_class_and_name(&dev->addr_list_lock,
 				   &netdev_addr_lock_key[i],
 				   netdev_lock_name[i]);
 }
 #else
 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 						 unsigned short dev_type)
 {
 }
 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 }
 #endif
 /*******************************************************************************
 		Protocol management and registration routines
 *******************************************************************************/
 /*
  *	Add a protocol ID to the list. Now that the input handler is
  *	smarter we can dispense with all the messy stuff that used to be
  *	here.
  *
  *	BEWARE!!! Protocol handlers, mangling input packets,
  *	MUST BE last in hash buckets and checking protocol handlers
  *	MUST start from promiscuous ptype_all chain in net_bh.
  *	It is true now, do not change it.
  *	Explanation follows: if protocol handler, mangling packet, will
  *	be the first on list, it is not able to sense, that packet
  *	is cloned and should be copied-on-write, so that it will
  *	change it and subsequent readers will get broken packet.
  *							--ANK (980803)
  */
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
 	if (pt->type == htons(ETH_P_ALL))
 		return &ptype_all;
 	else
 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 /**
  *	dev_add_pack - add packet handler
  *	@pt: packet type declaration
  *
  *	Add a protocol handler to the networking stack. The passed &packet_type
  *	is linked into kernel lists and may not be freed until it has been
  *	removed from the kernel lists.
  *
  *	This call does not sleep therefore it can not
  *	guarantee all CPU's that are in middle of receiving packets
  *	will see the new packet type (until the next received packet).
  */
 void dev_add_pack(struct packet_type *pt)
 {
 	struct list_head *head = ptype_head(pt);
 	spin_lock(&ptype_lock);
 	list_add_rcu(&pt->list, head);
 	spin_unlock(&ptype_lock);
 }
 EXPORT_SYMBOL(dev_add_pack);
 /**
  *	__dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
  *
  *	Remove a protocol handler that was previously added to the kernel
  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
  *	from the kernel lists and can be freed or reused once this function
  *	returns.
  *
  *      The packet type might still be in use by receivers
  *	and must not be freed until after all the CPU's have gone
  *	through a quiescent state.
  */
 void __dev_remove_pack(struct packet_type *pt)
 {
 	struct list_head *head = ptype_head(pt);
 	struct packet_type *pt1;
 	spin_lock(&ptype_lock);
 	list_for_each_entry(pt1, head, list) {
 		if (pt == pt1) {
 			list_del_rcu(&pt->list);
 			goto out;
 		}
 	}
 	pr_warn("dev_remove_pack: %p not found\n", pt);
 out:
 	spin_unlock(&ptype_lock);
 }
 EXPORT_SYMBOL(__dev_remove_pack);
 /**
  *	dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
  *
  *	Remove a protocol handler that was previously added to the kernel
  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
  *	from the kernel lists and can be freed or reused once this function
  *	returns.
  *
  *	This call sleeps to guarantee that no CPU is looking at the packet
  *	type after return.
  */
 void dev_remove_pack(struct packet_type *pt)
 {
 	__dev_remove_pack(pt);
 	synchronize_net();
 }
 EXPORT_SYMBOL(dev_remove_pack);
 /**
  *	dev_add_offload - register offload handlers
  *	@po: protocol offload declaration
  *
  *	Add protocol offload handlers to the networking stack. The passed
  *	&proto_offload is linked into kernel lists and may not be freed until
  *	it has been removed from the kernel lists.
  *
  *	This call does not sleep therefore it can not
  *	guarantee all CPU's that are in middle of receiving packets
  *	will see the new offload handlers (until the next received packet).
  */
 void dev_add_offload(struct packet_offload *po)
 {
 	struct list_head *head = &offload_base;
 	spin_lock(&offload_lock);
 	list_add_rcu(&po->list, head);
 	spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(dev_add_offload);
 /**
  *	__dev_remove_offload	 - remove offload handler
  *	@po: packet offload declaration
  *
  *	Remove a protocol offload handler that was previously added to the
  *	kernel offload handlers by dev_add_offload(). The passed &offload_type
  *	is removed from the kernel lists and can be freed or reused once this
  *	function returns.
  *
  *      The packet type might still be in use by receivers
  *	and must not be freed until after all the CPU's have gone
  *	through a quiescent state.
  */
 void __dev_remove_offload(struct packet_offload *po)
 {
 	struct list_head *head = &offload_base;
 	struct packet_offload *po1;
 	spin_lock(&offload_lock);
 	list_for_each_entry(po1, head, list) {
 		if (po == po1) {
 			list_del_rcu(&po->list);
 			goto out;
 		}
 	}
 	pr_warn("dev_remove_offload: %p not found\n", po);
 out:
 	spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(__dev_remove_offload);
 /**
  *	dev_remove_offload	 - remove packet offload handler
  *	@po: packet offload declaration
  *
  *	Remove a packet offload handler that was previously added to the kernel
  *	offload handlers by dev_add_offload(). The passed &offload_type is
  *	removed from the kernel lists and can be freed or reused once this
  *	function returns.
  *
  *	This call sleeps to guarantee that no CPU is looking at the packet
  *	type after return.
  */
 void dev_remove_offload(struct packet_offload *po)
 {
 	__dev_remove_offload(po);
 	synchronize_net();
 }
 EXPORT_SYMBOL(dev_remove_offload);
 /******************************************************************************
 		      Device Boot-time Settings Routines
 *******************************************************************************/
 /* Boot time configuration table */
 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 /**
  *	netdev_boot_setup_add	- add new setup entry
  *	@name: name of the device
  *	@map: configured settings for the device
  *
  *	Adds new setup entry to the dev_boot_setup list.  The function
  *	returns 0 on error and 1 on success.  This is a generic routine to
  *	all netdevices.
  */
 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 {
 	struct netdev_boot_setup *s;
 	int i;
 	s = dev_boot_setup;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 			memset(s[i].name, 0, sizeof(s[i].name));
 			strlcpy(s[i].name, name, IFNAMSIZ);
 			memcpy(&s[i].map, map, sizeof(s[i].map));
 			break;
 		}
 	}
 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 }
 /**
  *	netdev_boot_setup_check	- check boot time settings
  *	@dev: the netdevice
  *
  * 	Check boot time settings for the device.
  *	The found settings are set for the device to be used
  *	later in the device probing.
  *	Returns 0 if no settings found, 1 if they are.
  */
 int netdev_boot_setup_check(struct net_device *dev)
 {
 	struct netdev_boot_setup *s = dev_boot_setup;
 	int i;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 		    !strcmp(dev->name, s[i].name)) {
 			dev->irq 	= s[i].map.irq;
 			dev->base_addr 	= s[i].map.base_addr;
 			dev->mem_start 	= s[i].map.mem_start;
 			dev->mem_end 	= s[i].map.mem_end;
 			return 1;
 		}
 	}
 	return 0;
 }
 EXPORT_SYMBOL(netdev_boot_setup_check);
 /**
  *	netdev_boot_base	- get address from boot time settings
  *	@prefix: prefix for network device
  *	@unit: id for network device
  *
  * 	Check boot time settings for the base address of device.
  *	The found settings are set for the device to be used
  *	later in the device probing.
  *	Returns 0 if no settings found.
  */
 unsigned long netdev_boot_base(const char *prefix, int unit)
 {
 	const struct netdev_boot_setup *s = dev_boot_setup;
 	char name[IFNAMSIZ];
 	int i;
 	sprintf(name, "%s%d", prefix, unit);
 	/*
 	 * If device already registered then return base of 1
 	 * to indicate not to probe for this interface
 	 */
 	if (__dev_get_by_name(&init_net, name))
 		return 1;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 		if (!strcmp(name, s[i].name))
 			return s[i].map.base_addr;
 	return 0;
 }
 /*
  * Saves at boot time configured settings for any netdevice.
  */
 int __init netdev_boot_setup(char *str)
 {
 	int ints[5];
 	struct ifmap map;
 	str = get_options(str, ARRAY_SIZE(ints), ints);
 	if (!str || !*str)
 		return 0;
 	/* Save settings */
 	memset(&map, 0, sizeof(map));
 	if (ints[0] > 0)
 		map.irq = ints[1];
 	if (ints[0] > 1)
 		map.base_addr = ints[2];
 	if (ints[0] > 2)
 		map.mem_start = ints[3];
 	if (ints[0] > 3)
 		map.mem_end = ints[4];
 	/* Add new entry to the list */
 	return netdev_boot_setup_add(str, &map);
 }
 __setup("netdev=", netdev_boot_setup);
 /*******************************************************************************
 			    Device Interface Subroutines
 *******************************************************************************/
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. Must be called under RTNL semaphore
  *	or @dev_base_lock. If the name is found a pointer to the device
  *	is returned. If the name is not found then %NULL is returned. The
  *	reference counters are not incremented so the caller must be
  *	careful with locks.
  */
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 	hlist_for_each_entry(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_get_by_name);
 /**
  *	dev_get_by_name_rcu	- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name.
  *	If the name is found a pointer to the device is returned.
  * 	If the name is not found then %NULL is returned.
  *	The reference counters are not incremented so the caller must be
  *	careful with locks. The caller must hold RCU lock.
  */
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 	hlist_for_each_entry_rcu(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_get_by_name_rcu);
 /**
  *	dev_get_by_name		- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. This can be called from any
  *	context and does its own locking. The returned handle has
  *	the usage count incremented and the caller must use dev_put() to
  *	release it when it is no longer needed. %NULL is returned if no
  *	matching device is found.
  */
 struct net_device *dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	rcu_read_lock();
 	dev = dev_get_by_name_rcu(net, name);
 	if (dev)
 		dev_hold(dev);
 	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_name);
 /**
  *	__dev_get_by_index - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns %NULL if the device
  *	is not found or a pointer to the device. The device has not
  *	had its reference counter increased so the caller must be careful
  *	about locking. The caller must hold either the RTNL semaphore
  *	or @dev_base_lock.
  */
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 	hlist_for_each_entry(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_get_by_index);
 /**
  *	dev_get_by_index_rcu - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns %NULL if the device
  *	is not found or a pointer to the device. The device has not
  *	had its reference counter increased so the caller must be careful
  *	about locking. The caller must hold RCU lock.
  */
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 	hlist_for_each_entry_rcu(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_get_by_index_rcu);
 /**
  *	dev_get_by_index - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns NULL if the device
  *	is not found or a pointer to the device. The device returned has
  *	had a reference added and the pointer is safe until the user calls
  *	dev_put to indicate they have finished with it.
  */
 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
 		dev_hold(dev);
 	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_index);
 /**
  *	netdev_get_name - get a netdevice name, knowing its ifindex.
  *	@net: network namespace
  *	@name: a pointer to the buffer where the name will be stored.
  *	@ifindex: the ifindex of the interface to get the name from.
  *
  *	The use of raw_seqcount_begin() and cond_resched() before
  *	retrying is required as we want to give the writers a chance
  *	to complete when CONFIG_PREEMPT is not set.
  */
 int netdev_get_name(struct net *net, char *name, int ifindex)
 {
 	struct net_device *dev;
 	unsigned int seq;
 retry:
 	seq = raw_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (!dev) {
 		rcu_read_unlock();
 		return -ENODEV;
 	}
 	strcpy(name, dev->name);
 	rcu_read_unlock();
 	if (read_seqcount_retry(&devnet_rename_seq, seq)) {
 		cond_resched();
 		goto retry;
 	}
 	return 0;
 }
 /**
  *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
  *	is not found or a pointer to the device.
  *	The caller must hold RCU or RTNL.
  *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
  */
 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
 				       const char *ha)
 {
 	struct net_device *dev;
 	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
 	struct net_device *dev;
 	ASSERT_RTNL();
 	for_each_netdev(net, dev)
 		if (dev->type == type)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
 	struct net_device *dev, *ret = NULL;
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev)
 		if (dev->type == type) {
 			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 /**
  *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
  *	is not found or a pointer to the device. Must be called inside
  *	rcu_read_lock(), and result refcount is unchanged.
  */
 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
 				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 	ret = NULL;
 	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			ret = dev;
 			break;
 		}
 	}
 	return ret;
 }
 EXPORT_SYMBOL(dev_get_by_flags_rcu);
 /**
  *	dev_valid_name - check if name is okay for network device
  *	@name: name string
  *
  *	Network device names need to be valid file names to
  *	to allow sysfs to work.  We also disallow any kind of
  *	whitespace.
  */
 bool dev_valid_name(const char *name)
 {
 	if (*name == '\0')
 		return false;
 	if (strlen(name) >= IFNAMSIZ)
 		return false;
 	if (!strcmp(name, ".") || !strcmp(name, ".."))
 		return false;
 	while (*name) {
 		if (*name == '/' || isspace(*name))
 			return false;
 		name++;
 	}
 	return true;
 }
 EXPORT_SYMBOL(dev_valid_name);
 /**
  *	__dev_alloc_name - allocate a name for a device
  *	@net: network namespace to allocate the device name in
  *	@name: name format string
  *	@buf:  scratch buffer and result name string
  *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
  *	id. It scans list of devices to build up a free map, then chooses
  *	the first empty slot. The caller must hold the dev_base or rtnl lock
  *	while allocating the name and adding the device in order to avoid
  *	duplicates.
  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  *	Returns the number of the unit assigned or a negative errno code.
  */
 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 {
 	int i = 0;
 	const char *p;
 	const int max_netdevices = 8*PAGE_SIZE;
 	unsigned long *inuse;
 	struct net_device *d;
 	p = strnchr(name, IFNAMSIZ-1, '%');
 	if (p) {
 		/*
 		 * Verify the string as this thing may have come from
 		 * the user.  There must be either one "%d" and no other "%"
 		 * characters.
 		 */
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
 		/* Use one page as a bit array of possible slots */
 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 		if (!inuse)
 			return -ENOMEM;
 		for_each_netdev(net, d) {
 			if (!sscanf(d->name, name, &i))
 				continue;
 			if (i < 0 || i >= max_netdevices)
 				continue;
 			/*  avoid cases where sscanf is not exact inverse of printf */
 			snprintf(buf, IFNAMSIZ, name, i);
 			if (!strncmp(buf, d->name, IFNAMSIZ))
 				set_bit(i, inuse);
 		}
 		i = find_first_zero_bit(inuse, max_netdevices);
 		free_page((unsigned long) inuse);
 	}
 	if (buf != name)
 		snprintf(buf, IFNAMSIZ, name, i);
 	if (!__dev_get_by_name(net, buf))
 		return i;
 	/* It is possible to run out of possible slots
 	 * when the name is long and there isn't enough space left
 	 * for the digits, or if all bits are used.
 	 */
 	return -ENFILE;
 }
 /**
  *	dev_alloc_name - allocate a name for a device
  *	@dev: device
  *	@name: name format string
  *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
  *	id. It scans list of devices to build up a free map, then chooses
  *	the first empty slot. The caller must hold the dev_base or rtnl lock
  *	while allocating the name and adding the device in order to avoid
  *	duplicates.
  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  *	Returns the number of the unit assigned or a negative errno code.
  */
 int dev_alloc_name(struct net_device *dev, const char *name)
 {
 	char buf[IFNAMSIZ];
 	struct net *net;
 	int ret;
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 	ret = __dev_alloc_name(net, name, buf);
 	if (ret >= 0)
 		strlcpy(dev->name, buf, IFNAMSIZ);
 	return ret;
 }
 EXPORT_SYMBOL(dev_alloc_name);
 static int dev_alloc_name_ns(struct net *net,
 			     struct net_device *dev,
 			     const char *name)
 {
 	char buf[IFNAMSIZ];
 	int ret;
 	ret = __dev_alloc_name(net, name, buf);
 	if (ret >= 0)
 		strlcpy(dev->name, buf, IFNAMSIZ);
 	return ret;
 }
 static int dev_get_valid_name(struct net *net,
 			      struct net_device *dev,
 			      const char *name)
 {
 	BUG_ON(!net);
 	if (!dev_valid_name(name))
 		return -EINVAL;
 	if (strchr(name, '%'))
 		return dev_alloc_name_ns(net, dev, name);
 	else if (__dev_get_by_name(net, name))
 		return -EEXIST;
 	else if (dev->name != name)
 		strlcpy(dev->name, name, IFNAMSIZ);
 	return 0;
 }
 /**
  *	dev_change_name - change name of a device
  *	@dev: device
  *	@newname: name (or format string) must be at least IFNAMSIZ
  *
  *	Change name of a device, can pass format strings "eth%d".
  *	for wildcarding.
  */
 int dev_change_name(struct net_device *dev, const char *newname)
 {
 	char oldname[IFNAMSIZ];
 	int err = 0;
 	int ret;
 	struct net *net;
 	ASSERT_RTNL();
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 	write_seqcount_begin(&devnet_rename_seq);
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
 		write_seqcount_end(&devnet_rename_seq);
 		return 0;
 	}
 	memcpy(oldname, dev->name, IFNAMSIZ);
 	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0) {
 		write_seqcount_end(&devnet_rename_seq);
 		return err;
 	}
 rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
 		write_seqcount_end(&devnet_rename_seq);
 		return ret;
 	}
 	write_seqcount_end(&devnet_rename_seq);
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
 	write_unlock_bh(&dev_base_lock);
 	synchronize_rcu();
 	write_lock_bh(&dev_base_lock);
 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	write_unlock_bh(&dev_base_lock);
 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
 			write_seqcount_begin(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
 		} else {
 			pr_err("%s: name change rollback failed: %d\n",
 			       dev->name, ret);
 		}
 	}
 	return err;
 }
 /**
  *	dev_set_alias - change ifalias of a device
  *	@dev: device
  *	@alias: name up to IFALIASZ
  *	@len: limit of bytes to copy from info
  *
  *	Set ifalias for a device,
  */
 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 {
 	char *new_ifalias;
 	ASSERT_RTNL();
 	if (len >= IFALIASZ)
 		return -EINVAL;
 	if (!len) {
 		kfree(dev->ifalias);
 		dev->ifalias = NULL;
 		return 0;
 	}
 	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
 	if (!new_ifalias)
 		return -ENOMEM;
 	dev->ifalias = new_ifalias;
 	strlcpy(dev->ifalias, alias, len+1);
 	return len;
 }
 /**
  *	netdev_features_change - device changes features
  *	@dev: device to cause notification
  *
  *	Called to indicate a device has changed features.
  */
 void netdev_features_change(struct net_device *dev)
 {
 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 }
 EXPORT_SYMBOL(netdev_features_change);
 /**
  *	netdev_state_change - device changes state
  *	@dev: device to cause notification
  *
  *	Called to indicate a device has changed state. This function calls
  *	the notifier chains for netdev_chain and sends a NEWLINK message
  *	to the routing socket.
  */
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
 /**
  * 	netdev_notify_peers - notify network peers about existence of @dev
  * 	@dev: network device
  *
  * Generate traffic such that interested network peers are aware of
  * @dev, such as by generating a gratuitous ARP. This may be used when
  * a device wants to inform the rest of the network about some sort of
  * reconfiguration such as a failover event or virtual machine
  * migration.
  */
 void netdev_notify_peers(struct net_device *dev)
 {
 	rtnl_lock();
 	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
 	ASSERT_RTNL();
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	/* Block netpoll from trying to do any rx path servicing.
 	 * If we don't do this there is a chance ndo_poll_controller
 	 * or ndo_poll may be running while we open the device
 	 */
 	netpoll_rx_disable(dev);
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
 		return ret;
 	set_bit(__LINK_STATE_START, &dev->state);
 	if (ops->ndo_validate_addr)
 		ret = ops->ndo_validate_addr(dev);
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 	netpoll_rx_enable(dev);
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
 		dev->flags |= IFF_UP;
 		net_dmaengine_get();
 		dev_set_rx_mode(dev);
 		dev_activate(dev);
 		add_device_randomness(dev->dev_addr, dev->addr_len);
 	}
 	return ret;
 }
 /**
  *	dev_open	- prepare an interface for use.
  *	@dev:	device to open
  *
  *	Takes a device from down to up state. The device's private open
  *	function is invoked and then the multicast lists are loaded. Finally
  *	the device is moved into the up state and a %NETDEV_UP message is
  *	sent to the netdev notifier chain.
  *
  *	Calling this function on an active interface is a nop. On a failure
  *	a negative errno code is returned.
  */
 int dev_open(struct net_device *dev)
 {
 	int ret;
 	if (dev->flags & IFF_UP)
 		return 0;
 	ret = __dev_open(dev);
 	if (ret < 0)
 		return ret;
 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 	return ret;
 }
 EXPORT_SYMBOL(dev_open);
 static int __dev_close_many(struct list_head *head)
 {
 	struct net_device *dev;
 	ASSERT_RTNL();
 	might_sleep();
 	list_for_each_entry(dev, head, close_list) {
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 		clear_bit(__LINK_STATE_START, &dev->state);
 		/* Synchronize to scheduled poll. We cannot touch poll list, it
 		 * can be even on different cpu. So just clear netif_running().
 		 *
 		 * dev->stop() will invoke napi_disable() on all of it's
 		 * napi_struct instances on this device.
 		 */
 		smp_mb__after_clear_bit(); /* Commit netif_running(). */
 	}
 	dev_deactivate_many(head);
 	list_for_each_entry(dev, head, close_list) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		/*
 		 *	Call the device specific close. This cannot fail.
 		 *	Only if device is UP
 		 *
 		 *	We allow it to be called even after a DETACH hot-plug
 		 *	event.
 		 */
 		if (ops->ndo_stop)
 			ops->ndo_stop(dev);
 		dev->flags &= ~IFF_UP;
 		net_dmaengine_put();
 	}
 	return 0;
 }
 static int __dev_close(struct net_device *dev)
 {
 	int retval;
 	LIST_HEAD(single);
 	/* Temporarily disable netpoll until the interface is down */
 	netpoll_rx_disable(dev);
 	list_add(&dev->close_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
 	netpoll_rx_enable(dev);
 	return retval;
 }
 static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	/* Remove the devices that don't need to be closed */
 	list_for_each_entry_safe(dev, tmp, head, close_list)
 		if (!(dev->flags & IFF_UP))
 			list_del_init(&dev->close_list);
 	__dev_close_many(head);
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
 	return 0;
 }
 /**
  *	dev_close - shutdown an interface.
  *	@dev: device to shutdown
  *
  *	This function moves an active device into down state. A
  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *	chain.
  */
 int dev_close(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 		/* Block netpoll rx while the interface is going down */
 		netpoll_rx_disable(dev);
 		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 		netpoll_rx_enable(dev);
 	}
 	return 0;
 }
 EXPORT_SYMBOL(dev_close);
 /**
  *	dev_disable_lro - disable Large Receive Offload on a device
  *	@dev: device
  *
  *	Disable Large Receive Offload (LRO) on a net device.  Must be
  *	called under RTNL.  This is needed if received packets may be
  *	forwarded to another interface.
  */
 void dev_disable_lro(struct net_device *dev)
 {
 	/*
 	 * If we're trying to disable lro on a vlan device
 	 * use the underlying physical device instead
 	 */
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
 	/* the same for macvlan devices */
 	if (netif_is_macvlan(dev))
 		dev = macvlan_dev_real_dev(dev);
 	dev->wanted_features &= ~NETIF_F_LRO;
 	netdev_update_features(dev);
 	if (unlikely(dev->features & NETIF_F_LRO))
 		netdev_WARN(dev, "failed to disable LRO!\n");
 }
 EXPORT_SYMBOL(dev_disable_lro);
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
 {
 	struct netdev_notifier_info info;
 	netdev_notifier_info_init(&info, dev);
 	return nb->notifier_call(nb, val, &info);
 }
 static int dev_boot_phase = 1;
 /**
  *	register_netdevice_notifier - register a network notifier block
  *	@nb: notifier
  *
  *	Register a notifier to be called when network device events occur.
  *	The notifier passed is linked into the kernel structures and must
  *	not be reused until it has been unregistered. A negative errno code
  *	is returned on a failure.
  *
  * 	When registered all registration and up events are replayed
  *	to the new notifier to allow device to have a race free
  *	view of the network device list.
  */
 int register_netdevice_notifier(struct notifier_block *nb)
 {
 	struct net_device *dev;
 	struct net_device *last;
 	struct net *net;
 	int err;
 	rtnl_lock();
 	err = raw_notifier_chain_register(&netdev_chain, nb);
 	if (err)
 		goto unlock;
 	if (dev_boot_phase)
 		goto unlock;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 			err = notifier_to_errno(err);
 			if (err)
 				goto rollback;
 			if (!(dev->flags & IFF_UP))
 				continue;
 			call_netdevice_notifier(nb, NETDEV_UP, dev);
 		}
 	}
 unlock:
 	rtnl_unlock();
 	return err;
 rollback:
 	last = dev;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev == last)
 				goto outroll;
 			if (dev->flags & IFF_UP) {
 				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 							dev);
 				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 			}
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
 outroll:
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
 EXPORT_SYMBOL(register_netdevice_notifier);
 /**
  *	unregister_netdevice_notifier - unregister a network notifier block
  *	@nb: notifier
  *
  *	Unregister a notifier previously registered by
  *	register_netdevice_notifier(). The notifier is unlinked into the
  *	kernel structures and may then be reused. A negative errno code
  *	is returned on a failure.
  *
  * 	After unregistering unregister and down device events are synthesized
  *	for all devices on the device list to the removed notifier to remove
  *	the need for special case cleanup code.
  */
 int unregister_netdevice_notifier(struct notifier_block *nb)
 {
 	struct net_device *dev;
 	struct net *net;
 	int err;
 	rtnl_lock();
 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
 	if (err)
 		goto unlock;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev->flags & IFF_UP) {
 				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 							dev);
 				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 			}
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
 unlock:
 	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier);
 /**
  *	call_netdevice_notifiers_info - call all network notifier blocks
  *	@val: value passed unmodified to notifier function
  *	@dev: net_device pointer passed unmodified to notifier function
  *	@info: notifier information data
  *
  *	Call all network notifier blocks.  Parameters and return value
  *	are as for raw_notifier_call_chain().
  */
 int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
 				  struct netdev_notifier_info *info)
 {
 	ASSERT_RTNL();
 	netdev_notifier_info_init(info, dev);
 	return raw_notifier_call_chain(&netdev_chain, val, info);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers_info);
 /**
  *	call_netdevice_notifiers - call all network notifier blocks
  *      @val: value passed unmodified to notifier function
  *      @dev: net_device pointer passed unmodified to notifier function
  *
  *	Call all network notifier blocks.  Parameters and return value
  *	are as for raw_notifier_call_chain().
  */
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
 	struct netdev_notifier_info info;
 	return call_netdevice_notifiers_info(val, dev, &info);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
 /* We are not allowed to call static_key_slow_dec() from irq context
  * If net_disable_timestamp() is called from irq context, defer the
  * static_key_slow_dec() calls.
  */
 static atomic_t netstamp_needed_deferred;
 #endif
 void net_enable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
 	if (deferred) {
 		while (--deferred)
 			static_key_slow_dec(&netstamp_needed);
 		return;
 	}
 #endif
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
 	if (in_interrupt()) {
 		atomic_inc(&netstamp_needed_deferred);
 		return;
 	}
 #endif
 	static_key_slow_dec(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	skb->tstamp.tv64 = 0;
 	if (static_key_false(&netstamp_needed))
 		__net_timestamp(skb);
 }
 #define net_timestamp_check(COND, SKB)			\
 	if (static_key_false(&netstamp_needed)) {		\
 		if ((COND) && !(SKB)->tstamp.tv64)	\
 			__net_timestamp(SKB);		\
 	}						\
 static inline bool is_skb_forwardable(struct net_device *dev,
 				      struct sk_buff *skb)
 {
 	unsigned int len;
 	if (!(dev->flags & IFF_UP))
 		return false;
 	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
 	if (skb->len <= len)
 		return true;
 	/* if TSO is enabled, we don't care about the length as the packet
 	 * could be forwarded without being segmented before
 	 */
 	if (skb_is_gso(skb))
 		return true;
 	return false;
 }
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
  * @dev: destination network device
  * @skb: buffer to forward
  *
  * return values:
  *	NET_RX_SUCCESS	(no congestion)
  *	NET_RX_DROP     (packet was dropped, but freed)
  *
  * dev_forward_skb can be used for injecting an skb from the
  * start_xmit function of one device into the receive queue
  * of another device.
  *
  * The receiving device may be in another namespace, so
  * we have to clear all information in the skb that could
  * impact namespace isolation.
  */
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
 		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
 			atomic_long_inc(&dev->rx_dropped);
 			kfree_skb(skb);
 			return NET_RX_DROP;
 		}
 	}
 	if (unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
 	skb_scrub_packet(skb, true);
 	skb->protocol = eth_type_trans(skb, dev);
 	return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 static inline int deliver_skb(struct sk_buff *skb,
 			      struct packet_type *pt_prev,
 			      struct net_device *orig_dev)
 {
 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	atomic_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
 	if (!ptype->af_packet_priv || !skb->sk)
 		return false;
 	if (ptype->id_match)
 		return ptype->id_match(ptype, skb->sk);
 	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
 		return true;
 	return false;
 }
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
  */
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
 		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
 				continue;
 			}
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (!skb2)
 				break;
 			net_timestamp_set(skb2);
 			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
 			 */
 			skb_reset_mac_header(skb2);
 			if (skb_network_header(skb2) < skb2->data ||
 			    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
 				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
 						     ntohs(skb2->protocol),
 						     dev->name);
 				skb_reset_network_header(skb2);
 			}
 			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
 			pt_prev = ptype;
 		}
 	}
 	if (pt_prev)
 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
 }
 /**
  * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
  * @dev: Network device
  * @txq: number of queues available
  *
  * If real_num_tx_queues is changed the tc mappings may no longer be
  * valid. To resolve this verify the tc mapping remains valid and if
  * not NULL the mapping. With no priorities mapping to this
  * offset/count pair it will no longer be used. In the worst case TC0
  * is invalid nothing can be done so disable priority mappings. If is
  * expected that drivers will fix this mapping if they can before
  * calling netif_set_real_num_tx_queues.
  */
 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 {
 	int i;
 	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 	/* If TC0 is invalidated disable TC mapping */
 	if (tc->offset + tc->count > txq) {
 		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
 		dev->num_tc = 0;
 		return;
 	}
 	/* Invalidated prio to tc mappings set to TC0 */
 	for (i = 1; i < TC_BITMASK + 1; i++) {
 		int q = netdev_get_prio_tc_map(dev, i);
 		tc = &dev->tc_to_txq[q];
 		if (tc->offset + tc->count > txq) {
 			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
 				i, q);
 			netdev_set_prio_tc_map(dev, i, 0);
 		}
 	}
 }
 #ifdef CONFIG_XPS
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
 					int cpu, u16 index)
 {
 	struct xps_map *map = NULL;
 	int pos;
 	if (dev_maps)
 		map = xmap_dereference(dev_maps->cpu_map[cpu]);
 	for (pos = 0; map && pos < map->len; pos++) {
 		if (map->queues[pos] == index) {
 			if (map->len > 1) {
 				map->queues[pos] = map->queues[--map->len];
 			} else {
 				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
 				kfree_rcu(map, rcu);
 				map = NULL;
 			}
 			break;
 		}
 	}
 	return map;
 }
 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 {
 	struct xps_dev_maps *dev_maps;
 	int cpu, i;
 	bool active = false;
 	mutex_lock(&xps_map_mutex);
 	dev_maps = xmap_dereference(dev->xps_maps);
 	if (!dev_maps)
 		goto out_no_maps;
 	for_each_possible_cpu(cpu) {
 		for (i = index; i < dev->num_tx_queues; i++) {
 			if (!remove_xps_queue(dev_maps, cpu, i))
 				break;
 		}
 		if (i == dev->num_tx_queues)
 			active = true;
 	}
 	if (!active) {
 		RCU_INIT_POINTER(dev->xps_maps, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 	for (i = index; i < dev->num_tx_queues; i++)
 		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
 					     NUMA_NO_NODE);
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 }
 static struct xps_map *expand_xps_map(struct xps_map *map,
 				      int cpu, u16 index)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
 	int i, pos;
 	for (pos = 0; map && pos < map->len; pos++) {
 		if (map->queues[pos] != index)
 			continue;
 		return map;
 	}
 	/* Need to add queue to this CPU's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
 		alloc_len = map->alloc_len * 2;
 	}
 	/* Need to allocate new map to store queue on this CPU's map */
 	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
 			       cpu_to_node(cpu));
 	if (!new_map)
 		return NULL;
 	for (i = 0; i < pos; i++)
 		new_map->queues[i] = map->queues[i];
 	new_map->alloc_len = alloc_len;
 	new_map->len = pos;
 	return new_map;
 }
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 	struct xps_map *map, *new_map;
 	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
 	int cpu, numa_node_id = -2;
 	bool active = false;
 	mutex_lock(&xps_map_mutex);
 	dev_maps = xmap_dereference(dev->xps_maps);
 	/* allocate memory for queue storage */
 	for_each_online_cpu(cpu) {
 		if (!cpumask_test_cpu(cpu, mask))
 			continue;
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
 			mutex_unlock(&xps_map_mutex);
 			return -ENOMEM;
 		}
 		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
 				 NULL;
 		map = expand_xps_map(map, cpu, index);
 		if (!map)
 			goto error;
 		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
 	}
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 	for_each_possible_cpu(cpu) {
 		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
 			/* add queue to CPU maps */
 			int pos = 0;
 			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
 			if (numa_node_id == -2)
 				numa_node_id = cpu_to_node(cpu);
 			else if (numa_node_id != cpu_to_node(cpu))
 				numa_node_id = -1;
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
 			map = xmap_dereference(dev_maps->cpu_map[cpu]);
 			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
 		}
 	}
 	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
 	/* Cleanup old maps */
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 			map = xmap_dereference(dev_maps->cpu_map[cpu]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
 		kfree_rcu(dev_maps, rcu);
 	}
 	dev_maps = new_dev_maps;
 	active = true;
 out_no_new_maps:
 	/* update Tx queue numa node */
 	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
 				     (numa_node_id >= 0) ? numa_node_id :
 				     NUMA_NO_NODE);
 	if (!dev_maps)
 		goto out_no_maps;
 	/* removes queue from unused CPUs */
 	for_each_possible_cpu(cpu) {
 		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
 			continue;
 		if (remove_xps_queue(dev_maps, cpu, index))
 			active = true;
 	}
 	/* free map if not active */
 	if (!active) {
 		RCU_INIT_POINTER(dev->xps_maps, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 	return 0;
 error:
 	/* remove any maps that we added */
 	for_each_possible_cpu(cpu) {
 		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
 				 NULL;
 		if (new_map && new_map != map)
 			kfree(new_map);
 	}
 	mutex_unlock(&xps_map_mutex);
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(netif_set_xps_queue);
 #endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
 	int rc;
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 	if (dev->reg_state == NETREG_REGISTERED ||
 	    dev->reg_state == NETREG_UNREGISTERING) {
 		ASSERT_RTNL();
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
 						  txq);
 		if (rc)
 			return rc;
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
 #ifdef CONFIG_XPS
 			netif_reset_xps_queues_gt(dev, txq);
 #endif
 		}
 	}
 	dev->real_num_tx_queues = txq;
 	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 #ifdef CONFIG_RPS
 /**
  *	netif_set_real_num_rx_queues - set actual number of RX queues used
  *	@dev: Network device
  *	@rxq: Actual number of RX queues
  *
  *	This must be called either with the rtnl_lock held or before
  *	registration of the net device.  Returns 0 on success, or a
  *	negative error code.  If called before registration, it always
  *	succeeds.
  */
 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
 {
 	int rc;
 	if (rxq < 1 || rxq > dev->num_rx_queues)
 		return -EINVAL;
 	if (dev->reg_state == NETREG_REGISTERED) {
 		ASSERT_RTNL();
 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
 						  rxq);
 		if (rc)
 			return rc;
 	}
 	dev->real_num_rx_queues = rxq;
 	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
 #endif
 /**
  * netif_get_num_default_rss_queues - default number of RSS queues
  *
  * This routine should set an upper limit on the number of RSS queues
  * used by default by multiqueue devices.
  */
 int netif_get_num_default_rss_queues(void)
 {
 	return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
 }
 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
 static inline void __netif_reschedule(struct Qdisc *q)
 {
 	struct softnet_data *sd;
 	unsigned long flags;
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
 	q->next_sched = NULL;
 	*sd->output_queue_tailp = q;
 	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
 void __netif_schedule(struct Qdisc *q)
 {
 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
 		__netif_reschedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 struct dev_kfree_skb_cb {
 	enum skb_free_reason reason;
 };
 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
 {
 	return (struct dev_kfree_skb_cb *)skb->cb;
 }
 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	unsigned long flags;
 	if (likely(atomic_read(&skb->users) == 1)) {
 		smp_rmb();
 		atomic_set(&skb->users, 0);
 	} else if (likely(!atomic_dec_and_test(&skb->users))) {
 		return;
 	}
 	get_kfree_skb_cb(skb)->reason = reason;
 	local_irq_save(flags);
 	skb->next = __this_cpu_read(softnet_data.completion_queue);
 	__this_cpu_write(softnet_data.completion_queue, skb);
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__dev_kfree_skb_irq);
 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	if (in_irq() || irqs_disabled())
 		__dev_kfree_skb_irq(skb, reason);
 	else
 		dev_kfree_skb(skb);
 }
 EXPORT_SYMBOL(__dev_kfree_skb_any);
 /**
  * netif_device_detach - mark device as removed
  * @dev: network device
  *
  * Mark device as removed from system and therefore no longer available.
  */
 void netif_device_detach(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
 	    netif_running(dev)) {
 		netif_tx_stop_all_queues(dev);
 	}
 }
 EXPORT_SYMBOL(netif_device_detach);
 /**
  * netif_device_attach - mark device as attached
  * @dev: network device
  *
  * Mark device as attached from system and restart if needed.
  */
 void netif_device_attach(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
 	    netif_running(dev)) {
 		netif_tx_wake_all_queues(dev);
 		__netdev_watchdog_up(dev);
 	}
 }
 EXPORT_SYMBOL(netif_device_attach);
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
 	static const netdev_features_t null_features = 0;
 	struct net_device *dev = skb->dev;
 	const char *driver = "";
 	if (!net_ratelimit())
 		return;
 	if (dev && dev->dev.parent)
 		driver = dev_driver_string(dev->dev.parent);
 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
 	     "gso_type=%d ip_summed=%d\n",
 	     driver, dev ? &dev->features : &null_features,
 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
 }
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
  */
 int skb_checksum_help(struct sk_buff *skb)
 {
 	__wsum csum;
 	int ret = 0, offset;
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
 	if (unlikely(skb_shinfo(skb)->gso_size)) {
 		skb_warn_bad_offload(skb);
 		return -EINVAL;
 	}
 	/* Before computing a checksum, we should make sure no frag could
 	 * be modified by an external entity : checksum could be wrong.
 	 */
 	if (skb_has_shared_frag(skb)) {
 		ret = __skb_linearize(skb);
 		if (ret)
 			goto out;
 	}
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 	offset += skb->csum_offset;
 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
 	if (skb_cloned(skb) &&
 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 		if (ret)
 			goto out;
 	}
 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
 	return ret;
 }
 EXPORT_SYMBOL(skb_checksum_help);
 __be16 skb_network_protocol(struct sk_buff *skb)
 {
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
 	/* Tunnel gso handlers can set protocol to ethernet. */
 	if (type == htons(ETH_P_TEB)) {
 		struct ethhdr *eth;
 		if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
 			return 0;
 		eth = (struct ethhdr *)skb_mac_header(skb);
 		type = eth->h_proto;
 	}
 	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
 		struct vlan_hdr *vh;
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
 			return 0;
 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
 		type = vh->h_vlan_encapsulated_proto;
 		vlan_depth += VLAN_HLEN;
 	}
 	return type;
 }
 /**
  *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
  */
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb_network_protocol(skb);
 	if (unlikely(!type))
 		return ERR_PTR(-EINVAL);
 	__skb_pull(skb, skb->mac_len);
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				int err;
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
 				__skb_push(skb, (skb->data -
 						 skb_network_header(skb)));
 			}
 			segs = ptype->callbacks.gso_segment(skb, features);
 			break;
 		}
 	}
 	rcu_read_unlock();
 	__skb_push(skb, skb->data - skb_mac_header(skb));
 	return segs;
 }
 EXPORT_SYMBOL(skb_mac_gso_segment);
 /* openvswitch calls this on rx path, so we need a different check.
  */
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
 	if (tx_path)
 		return skb->ip_summed != CHECKSUM_PARTIAL;
 	else
 		return skb->ip_summed == CHECKSUM_NONE;
 }
 /**
  *	__skb_gso_segment - Perform segmentation on skb.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
  *	@tx_path: whether it is called in TX path
  *
  *	This function segments the given skb and returns a list of segments.
  *
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path)
 {
 	if (unlikely(skb_needs_check(skb, tx_path))) {
 		int err;
 		skb_warn_bad_offload(skb);
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
 	}
 	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
 	SKB_GSO_CB(skb)->encap_level = 0;
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 	return skb_mac_gso_segment(skb, features);
 }
 EXPORT_SYMBOL(__skb_gso_segment);
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev)
 {
 	if (net_ratelimit()) {
 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
 		dump_stack();
 	}
 }
 EXPORT_SYMBOL(netdev_rx_csum_fault);
 #endif
 /* Actually, we should eliminate this check as soon as we know, that:
  * 1. IOMMU is present and allows to map all the memory.
  * 2. No high memory really exists on this machine.
  */
 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
 	if (!(dev->features & NETIF_F_HIGHDMA)) {
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 			if (PageHighMem(skb_frag_page(frag)))
 				return 1;
 		}
 	}
 	if (PCI_DMA_BUS_IS_PHYS) {
 		struct device *pdev = dev->dev.parent;
 		if (!pdev)
 			return 0;
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
 				return 1;
 		}
 	}
 #endif
 	return 0;
 }
 struct dev_gso_cb {
 	void (*destructor)(struct sk_buff *skb);
 };
 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
 static void dev_gso_skb_destructor(struct sk_buff *skb)
 {
 	struct dev_gso_cb *cb;
 	do {
 		struct sk_buff *nskb = skb->next;
 		skb->next = nskb->next;
 		nskb->next = NULL;
 		kfree_skb(nskb);
 	} while (skb->next);
 	cb = DEV_GSO_CB(skb);
 	if (cb->destructor)
 		cb->destructor(skb);
 }
 /**
  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
  *	@skb: buffer to segment
  *	@features: device features as applicable to this skb
  *
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs;
 	segs = skb_gso_segment(skb, features);
 	/* Verifying header integrity only. */
 	if (!segs)
 		return 0;
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 	skb->next = segs;
 	DEV_GSO_CB(skb)->destructor = skb->destructor;
 	skb->destructor = dev_gso_skb_destructor;
 	return 0;
 }
 static netdev_features_t harmonize_features(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, skb_network_protocol(skb))) {
 		features &= ~NETIF_F_ALL_CSUM;
 	} else if (illegal_highdma(skb->dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
 	return features;
 }
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
 	netdev_features_t features = skb->dev->features;
 	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
 		return harmonize_features(skb, features);
 	}
 	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
 					       NETIF_F_HW_VLAN_STAG_TX);
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_STAG_TX;
 	return harmonize_features(skb, features);
 }
 EXPORT_SYMBOL(netif_skb_features);
-/*
- * Returns true if either:
- *	1. skb has frag_list and the device doesn't support FRAGLIST, or
- *	2. skb is fragmented and the device does not support SG.
- */
-static inline int skb_needs_linearize(struct sk_buff *skb,
-				      netdev_features_t features)
-{
-	return skb_is_nonlinear(skb) &&
-			((skb_has_frag_list(skb) &&
-				!(features & NETIF_F_FRAGLIST)) ||
-			(skb_shinfo(skb)->nr_frags &&
-				!(features & NETIF_F_SG)));
-}
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq, void *accel_priv)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
 	unsigned int skb_len;
 	if (likely(!skb->next)) {
 		netdev_features_t features;
 		/*
 		 * If device doesn't need skb->dst, release it right now while
 		 * its hot in this cpu cache
 		 */
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 		features = netif_skb_features(skb);
 		if (vlan_tx_tag_present(skb) &&
 		    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
 			skb = __vlan_put_tag(skb, skb->vlan_proto,
 					     vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
 			skb->vlan_tci = 0;
 		}
 		/* If encapsulation offload request, verify we are testing
 		 * hardware encapsulation features instead of standard
 		 * features for the netdev
 		 */
 		if (skb->encapsulation)
 			features &= dev->hw_enc_features;
 		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb, features)))
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
 		} else {
 			if (skb_needs_linearize(skb, features) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
 			/* If packet is not checksummed and device does not
 			 * support checksumming for this protocol, complete
 			 * checksumming here.
 			 */
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				if (skb->encapsulation)
 					skb_set_inner_transport_header(skb,
 						skb_checksum_start_offset(skb));
 				else
 					skb_set_transport_header(skb,
 						skb_checksum_start_offset(skb));
 				if (!(features & NETIF_F_ALL_CSUM) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
 			}
 		}
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 		skb_len = skb->len;
 		if (accel_priv)
 			rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
 		else
 			rc = ops->ndo_start_xmit(skb, dev);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
 		if (rc == NETDEV_TX_OK && txq)
 			txq_trans_update(txq);
 		return rc;
 	}
 gso:
 	do {
 		struct sk_buff *nskb = skb->next;
 		skb->next = nskb->next;
 		nskb->next = NULL;
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(nskb, dev);
 		skb_len = nskb->len;
 		if (accel_priv)
 			rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
 		else
 			rc = ops->ndo_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
 				goto out_kfree_gso_skb;
 			nskb->next = skb->next;
 			skb->next = nskb;
 			return rc;
 		}
 		txq_trans_update(txq);
 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 out_kfree_gso_skb:
 	if (likely(skb->next == NULL)) {
 		skb->destructor = DEV_GSO_CB(skb)->destructor;
 		consume_skb(skb);
 		return rc;
 	}
 out_kfree_skb:
 	kfree_skb(skb);
 out:
 	return rc;
 }
 EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
 	/* To get more precise estimation of bytes sent on wire,
 	 * we add to pkt_len the headers size of all segments
 	 */
 	if (shinfo->gso_size)  {
 		unsigned int hdr_len;
 		u16 gso_segs = shinfo->gso_segs;
 		/* mac layer + network layer */
 		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
 		/* + transport layer */
 		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
 			hdr_len += tcp_hdrlen(skb);
 		else
 			hdr_len += sizeof(struct udphdr);
 		if (shinfo->gso_type & SKB_GSO_DODGY)
 			gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
 						shinfo->gso_size);
 		qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
 	}
 }
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct net_device *dev,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
 	bool contended;
 	int rc;
 	qdisc_pkt_len_init(skb);
 	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
 	 * separate lock before trying to get qdisc main lock.
 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
 	 * and dequeue packets faster.
 	 */
 	contended = qdisc_is_running(q);
 	if (unlikely(contended))
 		spin_lock(&q->busylock);
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
 		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		qdisc_bstats_update(q, skb);
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
 			}
 			__qdisc_run(q);
 		} else
 			qdisc_run_end(q);
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
 			}
 			__qdisc_run(q);
 		}
 	}
 	spin_unlock(root_lock);
 	if (unlikely(contended))
 		spin_unlock(&q->busylock);
 	return rc;
 }
 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
 static void skb_update_prio(struct sk_buff *skb)
 {
 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
 	if (!skb->priority && skb->sk && map) {
 		unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
 		if (prioidx < map->priomap_len)
 			skb->priority = map->priomap[prioidx];
 	}
 }
 #else
 #define skb_update_prio(skb)
 #endif
 static DEFINE_PER_CPU(int, xmit_recursion);
 #define RECURSION_LIMIT 10
 /**
  *	dev_loopback_xmit - loop back @skb
  *	@skb: buffer to transmit
  */
 int dev_loopback_xmit(struct sk_buff *skb)
 {
 	skb_reset_mac_header(skb);
 	__skb_pull(skb, skb_network_offset(skb));
 	skb->pkt_type = PACKET_LOOPBACK;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(skb));
 	skb_dst_force(skb);
 	netif_rx_ni(skb);
 	return 0;
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
  *	this function. The function can be called from an interrupt.
  *
  *	A negative errno code is returned on a failure. A success does not
  *	guarantee the frame will be transmitted as it may be dropped due
  *	to congestion or traffic shaping.
  *
  * -----------------------------------------------------------------------------------
  *      I notice this method can also return errors from the queue disciplines,
  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
  *      be positive.
  *
  *      Regardless of the return value, the skb is consumed, so it is currently
  *      difficult to retry a send to this method.  (You can bump the ref count
  *      before sending to hold a reference for retry if you are careful.)
  *
  *      When calling this method, interrupts MUST be enabled.  This is because
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 	skb_reset_mac_header(skb);
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 	skb_update_prio(skb);
 	txq = netdev_pick_tx(dev, skb);
 	q = rcu_dereference_bh(txq->qdisc);
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
 	trace_net_dev_queue(skb);
 	if (q->enqueue) {
 		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
 	}
 	/* The device has no queue. Common case for software devices:
 	   loopback, all the sorts of tunnels...
 	   Really, it is unlikely that netif_tx_lock protection is necessary
 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
 	   counters.)
 	   However, it is possible, that they rely on protection
 	   made by us here.
 	   Check this and shot the lock. It is not prone from deadlocks.
 	   Either shot noqueue qdisc, it is even simpler 8)
 	 */
 	if (dev->flags & IFF_UP) {
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 		if (txq->xmit_lock_owner != cpu) {
 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
 				goto recursion_alert;
 			HARD_TX_LOCK(dev, txq, cpu);
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
 				rc = dev_hard_start_xmit(skb, dev, txq, NULL);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
 			}
 			HARD_TX_UNLOCK(dev, txq);
 			net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
 					     dev->name);
 		} else {
 			/* Recursion is detected! It is possible,
 			 * unfortunately
 			 */
 recursion_alert:
 			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
 					     dev->name);
 		}
 	}
 	rc = -ENETDOWN;
 	rcu_read_unlock_bh();
 	kfree_skb(skb);
 	return rc;
 out:
 	rcu_read_unlock_bh();
 	return rc;
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 /*=======================================================================
 			Receiver routines
   =======================================================================*/
 int netdev_max_backlog __read_mostly = 1000;
 EXPORT_SYMBOL(netdev_max_backlog);
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
 				     struct napi_struct *napi)
 {
 	list_add_tail(&napi->poll_list, &sd->poll_list);
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 #ifdef CONFIG_RPS
 /* One global table that all flow-based protocols share. */
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 struct static_key rps_needed __read_mostly;
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
 {
 	if (next_cpu != RPS_NO_CPU) {
 #ifdef CONFIG_RFS_ACCEL
 		struct netdev_rx_queue *rxqueue;
 		struct rps_dev_flow_table *flow_table;
 		struct rps_dev_flow *old_rflow;
 		u32 flow_id;
 		u16 rxq_index;
 		int rc;
 		/* Should we steer this flow to a different hardware queue? */
 		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
 		    !(dev->features & NETIF_F_NTUPLE))
 			goto out;
 		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
 		if (rxq_index == skb_get_rx_queue(skb))
 			goto out;
 		rxqueue = dev->_rx + rxq_index;
 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
 		if (!flow_table)
 			goto out;
 		flow_id = skb->rxhash & flow_table->mask;
 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
 							rxq_index, flow_id);
 		if (rc < 0)
 			goto out;
 		old_rflow = rflow;
 		rflow = &flow_table->flows[flow_id];
 		rflow->filter = rc;
 		if (old_rflow->filter == rflow->filter)
 			old_rflow->filter = RPS_NO_FILTER;
 	out:
 #endif
 		rflow->last_qtail =
 			per_cpu(softnet_data, next_cpu).input_queue_head;
 	}
 	rflow->cpu = next_cpu;
 	return rflow;
 }
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
  * rcu_read_lock must be held on entry.
  */
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		       struct rps_dev_flow **rflowp)
 {
 	struct netdev_rx_queue *rxqueue;
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u16 tcpu;
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->real_num_rx_queues)) {
 			WARN_ONCE(dev->real_num_rx_queues > 1,
 				  "%s received packet on queue %u, but number "
 				  "of RX queues is %u\n",
 				  dev->name, index, dev->real_num_rx_queues);
 			goto done;
 		}
 		rxqueue = dev->_rx + index;
 	} else
 		rxqueue = dev->_rx;
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
 		if (map->len == 1 &&
 		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
 			tcpu = map->cpus[0];
 			if (cpu_online(tcpu))
 				cpu = tcpu;
 			goto done;
 		}
 	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
 		goto done;
 	}
 	skb_reset_network_header(skb);
 	if (!skb_get_rxhash(skb))
 		goto done;
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
 	if (flow_table && sock_flow_table) {
 		u16 next_cpu;
 		struct rps_dev_flow *rflow;
 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
 		tcpu = rflow->cpu;
 		next_cpu = sock_flow_table->ents[skb->rxhash &
 		    sock_flow_table->mask];
 		/*
 		 * If the desired CPU (where last recvmsg was done) is
 		 * different from current CPU (one in the rx-queue flow
 		 * table entry), switch if one of the following holds:
 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
 		 *   - Current CPU is offline.
 		 *   - The current CPU's queue tail has advanced beyond the
 		 *     last packet that was enqueued using this table entry.
 		 *     This guarantees that all previous packets for the flow
 		 *     have been dequeued, thus preserving in order delivery.
 		 */
 		if (unlikely(tcpu != next_cpu) &&
 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
 		      rflow->last_qtail)) >= 0)) {
 			tcpu = next_cpu;
 			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
 		}
 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
 			*rflowp = rflow;
 			cpu = tcpu;
 			goto done;
 		}
 	}
 	if (map) {
 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 		if (cpu_online(tcpu)) {
 			cpu = tcpu;
 			goto done;
 		}
 	}
 done:
 	return cpu;
 }
 #ifdef CONFIG_RFS_ACCEL
 /**
  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
  * @dev: Device on which the filter was set
  * @rxq_index: RX queue index
  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
  *
  * Drivers that implement ndo_rx_flow_steer() should periodically call
  * this function for each installed filter and remove the filters for
  * which it returns %true.
  */
 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 			 u32 flow_id, u16 filter_id)
 {
 	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
 	struct rps_dev_flow_table *flow_table;
 	struct rps_dev_flow *rflow;
 	bool expire = true;
 	int cpu;
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id <= flow_table->mask) {
 		rflow = &flow_table->flows[flow_id];
 		cpu = ACCESS_ONCE(rflow->cpu);
 		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
 			   rflow->last_qtail) <
 		     (int)(10 * flow_table->mask)))
 			expire = false;
 	}
 	rcu_read_unlock();
 	return expire;
 }
 EXPORT_SYMBOL(rps_may_expire_flow);
 #endif /* CONFIG_RFS_ACCEL */
 /* Called from hardirq (IPI) context */
 static void rps_trigger_softirq(void *data)
 {
 	struct softnet_data *sd = data;
 	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
 }
 #endif /* CONFIG_RPS */
 /*
  * Check if this softnet_data structure is another cpu one
  * If yes, queue it to our IPI list and return 1
  * If no, return 0
  */
 static int rps_ipi_queued(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
 	if (sd != mysd) {
 		sd->rps_ipi_next = mysd->rps_ipi_list;
 		mysd->rps_ipi_list = sd;
 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 		return 1;
 	}
 #endif /* CONFIG_RPS */
 	return 0;
 }
 #ifdef CONFIG_NET_FLOW_LIMIT
 int netdev_flow_limit_table_len __read_mostly = (1 << 12);
 #endif
 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
 {
 #ifdef CONFIG_NET_FLOW_LIMIT
 	struct sd_flow_limit *fl;
 	struct softnet_data *sd;
 	unsigned int old_flow, new_flow;
 	if (qlen < (netdev_max_backlog >> 1))
 		return false;
 	sd = &__get_cpu_var(softnet_data);
 	rcu_read_lock();
 	fl = rcu_dereference(sd->flow_limit);
 	if (fl) {
 		new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
 		old_flow = fl->history[fl->history_head];
 		fl->history[fl->history_head] = new_flow;
 		fl->history_head++;
 		fl->history_head &= FLOW_LIMIT_HISTORY - 1;
 		if (likely(fl->buckets[old_flow]))
 			fl->buckets[old_flow]--;
 		if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
 			fl->count++;
 			rcu_read_unlock();
 			return true;
 		}
 	}
 	rcu_read_unlock();
 #endif
 	return false;
 }
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
  */
 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 			      unsigned int *qtail)
 {
 	struct softnet_data *sd;
 	unsigned long flags;
 	unsigned int qlen;
 	sd = &per_cpu(softnet_data, cpu);
 	local_irq_save(flags);
 	rps_lock(sd);
 	qlen = skb_queue_len(&sd->input_pkt_queue);
 	if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
 		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
 			input_queue_tail_incr_save(sd, qtail);
 			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 		/* Schedule NAPI for backlog device
 		 * We can use non atomic operation since we own the queue lock
 		 */
 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
 			if (!rps_ipi_queued(sd))
 				____napi_schedule(sd, &sd->backlog);
 		}
 		goto enqueue;
 	}
 	sd->dropped++;
 	rps_unlock(sd);
 	local_irq_restore(flags);
 	atomic_long_inc(&skb->dev->rx_dropped);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
 /**
  *	netif_rx	-	post buffer to the network code
  *	@skb: buffer to post
  *
  *	This function receives a packet from a device driver and queues it for
  *	the upper (protocol) levels to process.  It always succeeds. The buffer
  *	may be dropped during processing for congestion control or by the
  *	protocol layers.
  *
  *	return values:
  *	NET_RX_SUCCESS	(no congestion)
  *	NET_RX_DROP     (packet was dropped)
  *
  */
 int netif_rx(struct sk_buff *skb)
 {
 	int ret;
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 		preempt_disable();
 		rcu_read_lock();
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu < 0)
 			cpu = smp_processor_id();
 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 		rcu_read_unlock();
 		preempt_enable();
 	} else
 #endif
 	{
 		unsigned int qtail;
 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 		put_cpu();
 	}
 	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
 int netif_rx_ni(struct sk_buff *skb)
 {
 	int err;
 	preempt_disable();
 	err = netif_rx(skb);
 	if (local_softirq_pending())
 		do_softirq();
 	preempt_enable();
 	return err;
 }
 EXPORT_SYMBOL(netif_rx_ni);
 static void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	if (sd->completion_queue) {
 		struct sk_buff *clist;
 		local_irq_disable();
 		clist = sd->completion_queue;
 		sd->completion_queue = NULL;
 		local_irq_enable();
 		while (clist) {
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 			WARN_ON(atomic_read(&skb->users));
 			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
 				trace_consume_skb(skb);
 			else
 				trace_kfree_skb(skb, net_tx_action);
 			__kfree_skb(skb);
 		}
 	}
 	if (sd->output_queue) {
 		struct Qdisc *head;
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
 		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 		while (head) {
 			struct Qdisc *q = head;
 			spinlock_t *root_lock;
 			head = head->next_sched;
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
 				smp_mb__before_clear_bit();
 				clear_bit(__QDISC_STATE_SCHED,
 					  &q->state);
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
 					      &q->state)) {
 					__netif_reschedule(q);
 				} else {
 					smp_mb__before_clear_bit();
 					clear_bit(__QDISC_STATE_SCHED,
 						  &q->state);
 				}
 			}
 		}
 	}
 }
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
  * a compare and 2 stores extra right now if we dont have it on
  * but have CONFIG_NET_CLS_ACT
  * NOTE: This doesn't stop any functionality; if you dont have
  * the ingress scheduler, you just can't add policies on ingress.
  *
  */
 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
 	struct net_device *dev = skb->dev;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 	if (unlikely(MAX_RED_LOOP < ttl++)) {
 		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
 				     skb->skb_iif, dev->ifindex);
 		return TC_ACT_SHOT;
 	}
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
 			result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
 	}
 	return result;
 }
 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
 	if (!rxq || rxq->qdisc == &noop_qdisc)
 		goto out;
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
 	switch (ing_filter(skb, rxq)) {
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
 		kfree_skb(skb);
 		return NULL;
 	}
 out:
 	skb->tc_verd = 0;
 	return skb;
 }
 #endif
 /**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
  *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
  *	on a failure.
  *
  *	The caller must hold the rtnl_mutex.
  *
  *	For a general description of rx_handler, see enum rx_handler_result.
  */
 int netdev_rx_handler_register(struct net_device *dev,
 			       rx_handler_func_t *rx_handler,
 			       void *rx_handler_data)
 {
 	ASSERT_RTNL();
 	if (dev->rx_handler)
 		return -EBUSY;
 	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
 /**
  *	netdev_rx_handler_unregister - unregister receive handler
  *	@dev: device to unregister a handler from
  *
  *	Unregister a receive handler from a device.
  *
  *	The caller must hold the rtnl_mutex.
  */
 void netdev_rx_handler_unregister(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	RCU_INIT_POINTER(dev->rx_handler, NULL);
 	/* a reader seeing a non NULL rx_handler in a rcu_read_lock()
 	 * section has a guarantee to see a non NULL rx_handler_data
 	 * as well.
 	 */
 	synchronize_net();
 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 /*
  * Limit the use of PFMEMALLOC reserves to those protocols that implement
  * the special handling of PFMEMALLOC skbs.
  */
 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_ARP):
 	case __constant_htons(ETH_P_IP):
 	case __constant_htons(ETH_P_IPV6):
 	case __constant_htons(ETH_P_8021Q):
 	case __constant_htons(ETH_P_8021AD):
 		return true;
 	default:
 		return false;
 	}
 }
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *null_or_dev;
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
 	trace_netif_receive_skb(skb);
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		goto out;
 	orig_dev = skb->dev;
 	skb_reset_network_header(skb);
 	if (!skb_transport_header_was_set(skb))
 		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 	pt_prev = NULL;
 	rcu_read_lock();
 another_round:
 	skb->skb_iif = skb->dev->ifindex;
 	__this_cpu_inc(softnet_data.processed);
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
 	}
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
 		goto ncls;
 	}
 #endif
 	if (pfmemalloc)
 		goto skip_taps;
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (!ptype->dev || ptype->dev == skb->dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 skip_taps:
 #ifdef CONFIG_NET_CLS_ACT
 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
 	if (!skb)
 		goto unlock;
 ncls:
 #endif
 	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 	if (vlan_tx_tag_present(skb)) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
 		if (vlan_do_receive(&skb))
 			goto another_round;
 		else if (unlikely(!skb))
 			goto unlock;
 	}
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto unlock;
 		case RX_HANDLER_ANOTHER:
 			goto another_round;
 		case RX_HANDLER_EXACT:
 			deliver_exact = true;
 		case RX_HANDLER_PASS:
 			break;
 		default:
 			BUG();
 		}
 	}
 	if (unlikely(vlan_tx_tag_present(skb))) {
 		if (vlan_tx_tag_get_id(skb))
 			skb->pkt_type = PACKET_OTHERHOST;
 		/* Note: we might in the future use prio bits
 		 * and set skb->priority like in vlan_do_receive()
 		 * For the time being, just ignore Priority Code Point
 		 */
 		skb->vlan_tci = 0;
 	}
 	/* deliver only exact match when indicated */
 	null_or_dev = deliver_exact ? skb->dev : NULL;
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
 		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
 		     ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 	if (pt_prev) {
 		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 			goto drop;
 		else
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
 drop:
 		atomic_long_inc(&skb->dev->rx_dropped);
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
 		 */
 		ret = NET_RX_DROP;
 	}
 unlock:
 	rcu_read_unlock();
 out:
 	return ret;
 }
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
 	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
 		unsigned long pflags = current->flags;
 		/*
 		 * PFMEMALLOC skbs are special, they should
 		 * - be delivered to SOCK_MEMALLOC sockets only
 		 * - stay away from userspace
 		 * - have bounded memory usage
 		 *
 		 * Use PF_MEMALLOC as this saves us from propagating the allocation
 		 * context down to all allocation sites.
 		 */
 		current->flags |= PF_MEMALLOC;
 		ret = __netif_receive_skb_core(skb, true);
 		tsk_restore_flags(current, pflags, PF_MEMALLOC);
 	} else
 		ret = __netif_receive_skb_core(skb, false);
 	return ret;
 }
 /**
  *	netif_receive_skb - process receive buffer from network
  *	@skb: buffer to process
  *
  *	netif_receive_skb() is the main receive data processing function.
  *	It always succeeds. The buffer may be dropped during processing
  *	for congestion control or by the protocol layers.
  *
  *	This function may only be called from softirq context and interrupts
  *	should be enabled.
  *
  *	Return values (usually ignored):
  *	NET_RX_SUCCESS: no congestion
  *	NET_RX_DROP: packet was dropped
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 		rcu_read_lock();
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
 			return ret;
 		}
 		rcu_read_unlock();
 	}
 #endif
 	return __netif_receive_skb(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb);
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
  */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
 	rps_unlock(sd);
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
 }
 static int napi_gro_complete(struct sk_buff *skb)
 {
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int err = -ENOENT;
 	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
 	if (NAPI_GRO_CB(skb)->count == 1) {
 		skb_shinfo(skb)->gso_size = 0;
 		goto out;
 	}
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_complete)
 			continue;
 		err = ptype->callbacks.gro_complete(skb);
 		break;
 	}
 	rcu_read_unlock();
 	if (err) {
 		WARN_ON(&ptype->list == head);
 		kfree_skb(skb);
 		return NET_RX_SUCCESS;
 	}
 out:
 	return netif_receive_skb(skb);
 }
 /* napi->gro_list contains packets ordered by age.
  * youngest packets at the head of it.
  * Complete skbs in reverse order to reduce latencies.
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	struct sk_buff *skb, *prev = NULL;
 	/* scan list and build reverse chain */
 	for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
 		skb->prev = prev;
 		prev = skb;
 	}
 	for (skb = prev; skb; skb = prev) {
 		skb->next = NULL;
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
 		prev = skb->prev;
 		napi_gro_complete(skb);
 		napi->gro_count--;
 	}
 	napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 	unsigned int maclen = skb->dev->hard_header_len;
 	for (p = napi->gro_list; p; p = p->next) {
 		unsigned long diffs;
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
 						      skb_gro_mac_header(skb));
 		else if (!diffs)
 			diffs = memcmp(skb_mac_header(p),
 				       skb_gro_mac_header(skb),
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 }
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int same_flow;
 	enum gro_result ret;
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
 		goto normal;
 	gro_list_prepare(napi, skb);
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_receive)
 			continue;
 		skb_set_network_header(skb, skb_gro_offset(skb));
 		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
 	}
 	rcu_read_unlock();
 	if (&ptype->list == head)
 		goto normal;
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 	if (pp) {
 		struct sk_buff *nskb = *pp;
 		*pp = nskb->next;
 		nskb->next = NULL;
 		napi_gro_complete(nskb);
 		napi->gro_count--;
 	}
 	if (same_flow)
 		goto ok;
 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
 		goto normal;
 	napi->gro_count++;
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
 	ret = GRO_HELD;
 pull:
 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
 		BUG_ON(skb->end - skb->tail < grow);
 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
 		skb->tail += grow;
 		skb->data_len -= grow;
 		skb_shinfo(skb)->frags[0].page_offset += grow;
 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
 		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
 			skb_frag_unref(skb, 0);
 			memmove(skb_shinfo(skb)->frags,
 				skb_shinfo(skb)->frags + 1,
 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
 		}
 	}
 ok:
 	return ret;
 normal:
 	ret = GRO_NORMAL;
 	goto pull;
 }
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
 	case GRO_NORMAL:
 		if (netif_receive_skb(skb))
 			ret = GRO_DROP;
 		break;
 	case GRO_DROP:
 		kfree_skb(skb);
 		break;
 	case GRO_MERGED_FREE:
 		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
 			kmem_cache_free(skbuff_head_cache, skb);
 		else
 			__kfree_skb(skb);
 		break;
 	case GRO_HELD:
 	case GRO_MERGED:
 		break;
 	}
 	return ret;
 }
 static void skb_gro_reset_offset(struct sk_buff *skb)
 {
 	const struct skb_shared_info *pinfo = skb_shinfo(skb);
 	const skb_frag_t *frag0 = &pinfo->frags[0];
 	NAPI_GRO_CB(skb)->data_offset = 0;
 	NAPI_GRO_CB(skb)->frag0 = NULL;
 	NAPI_GRO_CB(skb)->frag0_len = 0;
 	if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
 	    pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
 		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
 	}
 }
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	skb_gro_reset_offset(skb);
 	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
 	napi->skb = skb;
 }
 struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
 	if (!skb) {
 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
 		napi->skb = skb;
 	}
 	return skb;
 }
 EXPORT_SYMBOL(napi_get_frags);
 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 			       gro_result_t ret)
 {
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
 		skb->protocol = eth_type_trans(skb, skb->dev);
 		if (ret == GRO_HELD)
 			skb_gro_pull(skb, -ETH_HLEN);
 		else if (netif_receive_skb(skb))
 			ret = GRO_DROP;
 		break;
 	case GRO_DROP:
 	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
 	case GRO_MERGED:
 		break;
 	}
 	return ret;
 }
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
 	struct ethhdr *eth;
 	unsigned int hlen;
 	unsigned int off;
 	napi->skb = NULL;
 	skb_reset_mac_header(skb);
 	skb_gro_reset_offset(skb);
 	off = skb_gro_offset(skb);
 	hlen = off + sizeof(*eth);
 	eth = skb_gro_header_fast(skb, off);
 	if (skb_gro_header_hard(skb, hlen)) {
 		eth = skb_gro_header_slow(skb, hlen, off);
 		if (unlikely(!eth)) {
 			napi_reuse_skb(napi, skb);
 			skb = NULL;
 			goto out;
 		}
 	}
 	skb_gro_pull(skb, sizeof(*eth));
 	/*
 	 * This works because the only protocols we care about don't require
 	 * special handling.  We'll fix it up properly at the end.
 	 */
 	skb->protocol = eth->h_proto;
 out:
 	return skb;
 }
 gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi_frags_skb(napi);
 	if (!skb)
 		return GRO_DROP;
 	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
 }
 EXPORT_SYMBOL(napi_gro_frags);
 /*
  * net_rps_action sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
  */
 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	struct softnet_data *remsd = sd->rps_ipi_list;
 	if (remsd) {
 		sd->rps_ipi_list = NULL;
 		local_irq_enable();
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
 		while (remsd) {
 			struct softnet_data *next = remsd->rps_ipi_next;
 			if (cpu_online(remsd->cpu))
 				__smp_call_function_single(remsd->cpu,
 							   &remsd->csd, 0);
 			remsd = next;
 		}
 	} else
 #endif
 		local_irq_enable();
 }
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 #ifdef CONFIG_RPS
 	/* Check if we have pending ipi, its better to send them now,
 	 * not waiting net_rx_action() end.
 	 */
 	if (sd->rps_ipi_list) {
 		local_irq_disable();
 		net_rps_action_and_irq_enable(sd);
 	}
 #endif
 	napi->weight = weight_p;
 	local_irq_disable();
 	while (work < quota) {
 		struct sk_buff *skb;
 		unsigned int qlen;
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			local_irq_enable();
 			__netif_receive_skb(skb);
 			local_irq_disable();
 			input_queue_head_incr(sd);
 			if (++work >= quota) {
 				local_irq_enable();
 				return work;
 			}
 		}
 		rps_lock(sd);
 		qlen = skb_queue_len(&sd->input_pkt_queue);
 		if (qlen)
 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 						   &sd->process_queue);
 		if (qlen < quota - work) {
 			/*
 			 * Inline a custom version of __napi_complete().
 			 * only current cpu owns and manipulates this napi,
 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
 			 * we can use a plain write instead of clear_bit(),
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			list_del(&napi->poll_list);
 			napi->state = 0;
 			quota = work + qlen;
 		}
 		rps_unlock(sd);
 	}
 	local_irq_enable();
 	return work;
 }
 /**
  * __napi_schedule - schedule for receive
  * @n: entry to schedule
  *
  * The entry's receive function will be scheduled to run
  */
 void __napi_schedule(struct napi_struct *n)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	____napi_schedule(&__get_cpu_var(softnet_data), n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
 void __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 	BUG_ON(n->gro_list);
 	list_del(&n->poll_list);
 	smp_mb__before_clear_bit();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
 }
 EXPORT_SYMBOL(__napi_complete);
 void napi_complete(struct napi_struct *n)
 {
 	unsigned long flags;
 	/*
 	 * don't let napi dequeue from the cpu poll list
 	 * just in case its running on a different cpu
 	 */
 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
 		return;
 	napi_gro_flush(n, false);
 	local_irq_save(flags);
 	__napi_complete(n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(napi_complete);
 /* must be called under rcu_read_lock(), as we dont take a reference */
 struct napi_struct *napi_by_id(unsigned int napi_id)
 {
 	unsigned int hash = napi_id % HASH_SIZE(napi_hash);
 	struct napi_struct *napi;
 	hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
 		if (napi->napi_id == napi_id)
 			return napi;
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(napi_by_id);
 void napi_hash_add(struct napi_struct *napi)
 {
 	if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
 		spin_lock(&napi_hash_lock);
 		/* 0 is not a valid id, we also skip an id that is taken
 		 * we expect both events to be extremely rare
 		 */
 		napi->napi_id = 0;
 		while (!napi->napi_id) {
 			napi->napi_id = ++napi_gen_id;
 			if (napi_by_id(napi->napi_id))
 				napi->napi_id = 0;
 		}
 		hlist_add_head_rcu(&napi->napi_hash_node,
 			&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
 		spin_unlock(&napi_hash_lock);
 	}
 }
 EXPORT_SYMBOL_GPL(napi_hash_add);
 /* Warning : caller is responsible to make sure rcu grace period
  * is respected before freeing memory containing @napi
  */
 void napi_hash_del(struct napi_struct *napi)
 {
 	spin_lock(&napi_hash_lock);
 	if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
 		hlist_del_rcu(&napi->napi_hash_node);
 	spin_unlock(&napi_hash_lock);
 }
 EXPORT_SYMBOL_GPL(napi_hash_del);
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
 	INIT_LIST_HEAD(&napi->poll_list);
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
 		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
 			    weight, dev->name);
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
 #ifdef CONFIG_NETPOLL
 	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
 	set_bit(NAPI_STATE_SCHED, &napi->state);
 }
 EXPORT_SYMBOL(netif_napi_add);
 void netif_napi_del(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 	for (skb = napi->gro_list; skb; skb = next) {
 		next = skb->next;
 		skb->next = NULL;
 		kfree_skb(skb);
 	}
 	napi->gro_list = NULL;
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 static void net_rx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
 	local_irq_disable();
 	while (!list_empty(&sd->poll_list)) {
 		struct napi_struct *n;
 		int work, weight;
 		/* If softirq window is exhuasted then punt.
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
 		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
 			goto softnet_break;
 		local_irq_enable();
 		/* Even though interrupts have been re-enabled, this
 		 * access is safe because interrupts can only add new
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
 		have = netpoll_poll_lock(n);
 		weight = n->weight;
 		/* This NAPI_STATE_SCHED test is for avoiding a race
 		 * with netpoll's poll_napi().  Only the entity which
 		 * obtains the lock and sees NAPI_STATE_SCHED set will
 		 * actually make the ->poll() call.  Therefore we avoid
 		 * accidentally calling ->poll() when NAPI is not scheduled.
 		 */
 		work = 0;
 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 			work = n->poll(n, weight);
 			trace_napi_poll(n);
 		}
 		WARN_ON_ONCE(work > weight);
 		budget -= work;
 		local_irq_disable();
 		/* Drivers must not modify the NAPI state if they
 		 * consume the entire weight.  In such cases this code
 		 * still "owns" the NAPI instance and therefore can
 		 * move the instance around on the list at-will.
 		 */
 		if (unlikely(work == weight)) {
 			if (unlikely(napi_disable_pending(n))) {
 				local_irq_enable();
 				napi_complete(n);
 				local_irq_disable();
 			} else {
 				if (n->gro_list) {
 					/* flush too old packets
 					 * If HZ < 1000, flush all packets.
 					 */
 					local_irq_enable();
 					napi_gro_flush(n, HZ >= 1000);
 					local_irq_disable();
 				}
 				list_move_tail(&n->poll_list, &sd->poll_list);
 			}
 		}
 		netpoll_poll_unlock(have);
 	}
 out:
 	net_rps_action_and_irq_enable(sd);
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
 	 * any pending DMA copies to hardware
 	 */
 	dma_issue_pending_all();
 #endif
 	return;
 softnet_break:
 	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
 struct netdev_adjacent {
 	struct net_device *dev;
 	/* upper master flag, there can only be one master device per list */
 	bool master;
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 	/* private field for the users */
 	void *private;
 	struct list_head list;
 	struct rcu_head rcu;
 };
 static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
 						     struct net_device *adj_dev,
 						     struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 	list_for_each_entry_rcu(adj, adj_list, list) {
 		if (adj->dev == adj_dev)
 			return adj;
 	}
 	return NULL;
 }
 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 						 struct net_device *adj_dev,
 						 struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 	list_for_each_entry(adj, adj_list, list) {
 		if (adj->dev == adj_dev)
 			return adj;
 	}
 	return NULL;
 }
 /**
  * netdev_has_upper_dev - Check if device is linked to an upper device
  * @dev: device
  * @upper_dev: upper device to check
  *
  * Find out if a device is linked to specified upper device and return true
  * in case it is. Note that this checks only immediate upper device,
  * not through a complete stack of devices. The caller must hold the RTNL lock.
  */
 bool netdev_has_upper_dev(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
 	ASSERT_RTNL();
 	return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 /**
  * netdev_has_any_upper_dev - Check if device is linked to some device
  * @dev: device
  *
  * Find out if a device is linked to an upper device and return true in case
  * it is. The caller must hold the RTNL lock.
  */
 bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	return !list_empty(&dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_any_upper_dev);
 /**
  * netdev_master_upper_dev_get - Get master upper device
  * @dev: device
  *
  * Find a master upper device and return pointer to it or NULL in case
  * it's not there. The caller must hold the RTNL lock.
  */
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 	ASSERT_RTNL();
 	if (list_empty(&dev->adj_list.upper))
 		return NULL;
 	upper = list_first_entry(&dev->adj_list.upper,
 				 struct netdev_adjacent, list);
 	if (likely(upper->master))
 		return upper->dev;
 	return NULL;
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 void *netdev_adjacent_get_private(struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 	adj = list_entry(adj_list, struct netdev_adjacent, list);
 	return adj->private;
 }
 EXPORT_SYMBOL(netdev_adjacent_get_private);
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next device from the dev's upper list, starting from iter
  * position. The caller must hold RCU read lock.
  */
 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter)
 {
 	struct netdev_adjacent *upper;
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 	if (&upper->list == &dev->all_adj_list.upper)
 		return NULL;
 	*iter = &upper->list;
 	return upper->dev;
 }
 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 /**
  * netdev_lower_get_next_private - Get the next ->private from the
  *				   lower neighbour list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next netdev_adjacent->private from the dev's lower neighbour
  * list, starting from iter position. The caller must hold either hold the
  * RTNL lock or its own locking that guarantees that the neighbour lower
  * list will remain unchainged.
  */
 void *netdev_lower_get_next_private(struct net_device *dev,
 				    struct list_head **iter)
 {
 	struct netdev_adjacent *lower;
 	lower = list_entry(*iter, struct netdev_adjacent, list);
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 	if (iter)
 		*iter = lower->list.next;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_get_next_private);
 /**
  * netdev_lower_get_next_private_rcu - Get the next ->private from the
  *				       lower neighbour list, RCU
  *				       variant
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next netdev_adjacent->private from the dev's lower neighbour
  * list, starting from iter position. The caller must hold RCU read lock.
  */
 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 					struct list_head **iter)
 {
 	struct netdev_adjacent *lower;
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 	if (iter)
 		*iter = &lower->list;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
  * @dev: device
  *
  * Find a master upper device and return pointer to it or NULL in case
  * it's not there. The caller must hold the RCU read lock.
  */
 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 	upper = list_first_or_null_rcu(&dev->adj_list.upper,
 				       struct netdev_adjacent, list);
 	if (upper && likely(upper->master))
 		return upper->dev;
 	return NULL;
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					struct list_head *dev_list,
 					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
 	char linkname[IFNAMSIZ+7];
 	int ret;
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 	if (adj) {
 		adj->ref_nr++;
 		return 0;
 	}
 	adj = kmalloc(sizeof(*adj), GFP_KERNEL);
 	if (!adj)
 		return -ENOMEM;
 	adj->dev = adj_dev;
 	adj->master = master;
 	adj->ref_nr = 1;
 	adj->private = private;
 	dev_hold(adj_dev);
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 	if (dev_list == &dev->adj_list.lower) {
 		sprintf(linkname, "lower_%s", adj_dev->name);
 		ret = sysfs_create_link(&(dev->dev.kobj),
 					&(adj_dev->dev.kobj), linkname);
 		if (ret)
 			goto free_adj;
 	} else if (dev_list == &dev->adj_list.upper) {
 		sprintf(linkname, "upper_%s", adj_dev->name);
 		ret = sysfs_create_link(&(dev->dev.kobj),
 					&(adj_dev->dev.kobj), linkname);
 		if (ret)
 			goto free_adj;
 	}
 	/* Ensure that master link is always the first item in list. */
 	if (master) {
 		ret = sysfs_create_link(&(dev->dev.kobj),
 					&(adj_dev->dev.kobj), "master");
 		if (ret)
 			goto remove_symlinks;
 		list_add_rcu(&adj->list, dev_list);
 	} else {
 		list_add_tail_rcu(&adj->list, dev_list);
 	}
 	return 0;
 remove_symlinks:
 	if (dev_list == &dev->adj_list.lower) {
 		sprintf(linkname, "lower_%s", adj_dev->name);
 		sysfs_remove_link(&(dev->dev.kobj), linkname);
 	} else if (dev_list == &dev->adj_list.upper) {
 		sprintf(linkname, "upper_%s", adj_dev->name);
 		sysfs_remove_link(&(dev->dev.kobj), linkname);
 	}
 free_adj:
 	kfree(adj);
 	dev_put(adj_dev);
 	return ret;
 }
 void __netdev_adjacent_dev_remove(struct net_device *dev,
 				  struct net_device *adj_dev,
 				  struct list_head *dev_list)
 {
 	struct netdev_adjacent *adj;
 	char linkname[IFNAMSIZ+7];
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 	if (!adj) {
 		pr_err("tried to remove device %s from %s\n",
 		       dev->name, adj_dev->name);
 		BUG();
 	}
 	if (adj->ref_nr > 1) {
 		pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
 			 adj->ref_nr-1);
 		adj->ref_nr--;
 		return;
 	}
 	if (adj->master)
 		sysfs_remove_link(&(dev->dev.kobj), "master");
 	if (dev_list == &dev->adj_list.lower) {
 		sprintf(linkname, "lower_%s", adj_dev->name);
 		sysfs_remove_link(&(dev->dev.kobj), linkname);
 	} else if (dev_list == &dev->adj_list.upper) {
 		sprintf(linkname, "upper_%s", adj_dev->name);
 		sysfs_remove_link(&(dev->dev.kobj), linkname);
 	}
 	list_del_rcu(&adj->list);
 	pr_debug("dev_put for %s, because link removed from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 	dev_put(adj_dev);
 	kfree_rcu(adj, rcu);
 }
 int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 				     struct net_device *upper_dev,
 				     struct list_head *up_list,
 				     struct list_head *down_list,
 				     void *private, bool master)
 {
 	int ret;
 	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
 					   master);
 	if (ret)
 		return ret;
 	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
 					   false);
 	if (ret) {
 		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 		return ret;
 	}
 	return 0;
 }
 int __netdev_adjacent_dev_link(struct net_device *dev,
 			       struct net_device *upper_dev)
 {
 	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
 						&dev->all_adj_list.upper,
 						&upper_dev->all_adj_list.lower,
 						NULL, false);
 }
 void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
 					struct net_device *upper_dev,
 					struct list_head *up_list,
 					struct list_head *down_list)
 {
 	__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 	__netdev_adjacent_dev_remove(upper_dev, dev, down_list);
 }
 void __netdev_adjacent_dev_unlink(struct net_device *dev,
 				  struct net_device *upper_dev)
 {
 	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
 					   &dev->all_adj_list.upper,
 					   &upper_dev->all_adj_list.lower);
 }
 int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 					 struct net_device *upper_dev,
 					 void *private, bool master)
 {
 	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
 	if (ret)
 		return ret;
 	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
 					       &dev->adj_list.upper,
 					       &upper_dev->adj_list.lower,
 					       private, master);
 	if (ret) {
 		__netdev_adjacent_dev_unlink(dev, upper_dev);
 		return ret;
 	}
 	return 0;
 }
 void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 					    struct net_device *upper_dev)
 {
 	__netdev_adjacent_dev_unlink(dev, upper_dev);
 	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
 					   &dev->adj_list.upper,
 					   &upper_dev->adj_list.lower);
 }
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master,
 				   void *private)
 {
 	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
 	ASSERT_RTNL();
 	if (dev == upper_dev)
 		return -EBUSY;
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
 	if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
 		return -EBUSY;
 	if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
 		return -EEXIST;
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
 						   master);
 	if (ret)
 		return ret;
 	/* Now that we linked these devs, make all the upper_dev's
 	 * all_adj_list.upper visible to every dev's all_adj_list.lower an
 	 * versa, and don't forget the devices itself. All of these
 	 * links are non-neighbours.
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			pr_debug("Interlinking %s with %s, non-neighbour\n",
 				 i->dev->name, j->dev->name);
 			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
 			if (ret)
 				goto rollback_mesh;
 		}
 	}
 	/* add dev to every upper_dev's upper device */
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		pr_debug("linking %s's upper device %s with %s\n",
 			 upper_dev->name, i->dev->name, dev->name);
 		ret = __netdev_adjacent_dev_link(dev, i->dev);
 		if (ret)
 			goto rollback_upper_mesh;
 	}
 	/* add upper_dev to every dev's lower device */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		pr_debug("linking %s's lower device %s with %s\n", dev->name,
 			 i->dev->name, upper_dev->name);
 		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
 		if (ret)
 			goto rollback_lower_mesh;
 	}
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
 	return 0;
 rollback_lower_mesh:
 	to_i = i;
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 	}
 	i = NULL;
 rollback_upper_mesh:
 	to_i = i;
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 	}
 	i = j = NULL;
 rollback_mesh:
 	to_i = i;
 	to_j = j;
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			if (i == to_i && j == to_j)
 				break;
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 		}
 		if (i == to_i)
 			break;
 	}
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 	return ret;
 }
 /**
  * netdev_upper_dev_link - Add a link to the upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Adds a link to device which is upper to this one. The caller must hold
  * the RTNL lock. On a failure a negative errno code is returned.
  * On success the reference counts are adjusted and the function
  * returns zero.
  */
 int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 /**
  * netdev_master_upper_dev_link - Add a master link to the upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Adds a link to device which is upper to this one. In this case, only
  * one master upper device can be linked, although other non-master devices
  * might be linked as well. The caller must hold the RTNL lock.
  * On a failure a negative errno code is returned. On success the reference
  * counts are adjusted and the function returns zero.
  */
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 int netdev_master_upper_dev_link_private(struct net_device *dev,
 					 struct net_device *upper_dev,
 					 void *private)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, true, private);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
 /**
  * netdev_upper_dev_unlink - Removes a link to upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Removes a link to device which is upper to this one. The caller must hold
  * the RTNL lock.
  */
 void netdev_upper_dev_unlink(struct net_device *dev,
 			     struct net_device *upper_dev)
 {
 	struct netdev_adjacent *i, *j;
 	ASSERT_RTNL();
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 	/* Here is the tricky part. We must remove all dev's lower
 	 * devices from all upper_dev's upper devices and vice
 	 * versa, to maintain the graph relationship.
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 	/* remove also the devices itself from lower/upper device
 	 * list
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
 				       struct net_device *lower_dev)
 {
 	struct netdev_adjacent *lower;
 	if (!lower_dev)
 		return NULL;
 	lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
 	if (!lower)
 		return NULL;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
 void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev)
 {
 	struct netdev_adjacent *lower;
 	if (!lower_dev)
 		return NULL;
 	lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
 	if (!lower)
 		return NULL;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (ops->ndo_change_rx_flags)
 		ops->ndo_change_rx_flags(dev, flags);
 }
 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
 {
 	unsigned int old_flags = dev->flags;
 	kuid_t uid;
 	kgid_t gid;
 	ASSERT_RTNL();
 	dev->flags |= IFF_PROMISC;
 	dev->promiscuity += inc;
 	if (dev->promiscuity == 0) {
 		/*
 		 * Avoid overflow.
 		 * If inc causes overflow, untouch promisc and return error.
 		 */
 		if (inc < 0)
 			dev->flags &= ~IFF_PROMISC;
 		else {
 			dev->promiscuity -= inc;
 			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
 				dev->name);
 			return -EOVERFLOW;
 		}
 	}
 	if (dev->flags != old_flags) {
 		pr_info("device %s %s promiscuous mode\n",
 			dev->name,
 			dev->flags & IFF_PROMISC ? "entered" : "left");
 		if (audit_enabled) {
 			current_uid_gid(&uid, &gid);
 			audit_log(current->audit_context, GFP_ATOMIC,
 				AUDIT_ANOM_PROMISCUOUS,
 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
 				dev->name, (dev->flags & IFF_PROMISC),
 				(old_flags & IFF_PROMISC),
 				from_kuid(&init_user_ns, audit_get_loginuid(current)),
 				from_kuid(&init_user_ns, uid),
 				from_kgid(&init_user_ns, gid),
 				audit_get_sessionid(current));
 		}
 		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
 	if (notify)
 		__dev_notify_flags(dev, old_flags, IFF_PROMISC);
 	return 0;
 }
 /**
  *	dev_set_promiscuity	- update promiscuity count on a device
  *	@dev: device
  *	@inc: modifier
  *
  *	Add or remove promiscuity from a device. While the count in the device
  *	remains above zero the interface remains promiscuous. Once it hits zero
  *	the device reverts back to normal filtering operation. A negative inc
  *	value is used to drop promiscuity on the device.
  *	Return 0 if successful or a negative errno code on error.
  */
 int dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned int old_flags = dev->flags;
 	int err;
 	err = __dev_set_promiscuity(dev, inc, true);
 	if (err < 0)
 		return err;
 	if (dev->flags != old_flags)
 		dev_set_rx_mode(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_set_promiscuity);
 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
 {
 	unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
 	ASSERT_RTNL();
 	dev->flags |= IFF_ALLMULTI;
 	dev->allmulti += inc;
 	if (dev->allmulti == 0) {
 		/*
 		 * Avoid overflow.
 		 * If inc causes overflow, untouch allmulti and return error.
 		 */
 		if (inc < 0)
 			dev->flags &= ~IFF_ALLMULTI;
 		else {
 			dev->allmulti -= inc;
 			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
 				dev->name);
 			return -EOVERFLOW;
 		}
 	}
 	if (dev->flags ^ old_flags) {
 		dev_change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
 		if (notify)
 			__dev_notify_flags(dev, old_flags,
 					   dev->gflags ^ old_gflags);
 	}
 	return 0;
 }
 /**
  *	dev_set_allmulti	- update allmulti count on a device
  *	@dev: device
  *	@inc: modifier
  *
  *	Add or remove reception of all multicast frames to a device. While the
  *	count in the device remains above zero the interface remains listening
  *	to all interfaces. Once it hits zero the device reverts back to normal
  *	filtering operation. A negative @inc value is used to drop the counter
  *	when releasing a resource needing all multicasts.
  *	Return 0 if successful or a negative errno code on error.
  */
 int dev_set_allmulti(struct net_device *dev, int inc)
 {
 	return __dev_set_allmulti(dev, inc, true);
 }
 EXPORT_SYMBOL(dev_set_allmulti);
 /*
  *	Upload unicast and multicast address lists to device and
  *	configure RX filtering. When the device doesn't support unicast
  *	filtering it is put in promiscuous mode while unicast addresses
  *	are present.
  */
 void __dev_set_rx_mode(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* dev_open will call this function so the list will stay sane. */
 	if (!(dev->flags&IFF_UP))
 		return;
 	if (!netif_device_present(dev))
 		return;
 	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1, false);
 			dev->uc_promisc = true;
 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1, false);
 			dev->uc_promisc = false;
 		}
 	}
 	if (ops->ndo_set_rx_mode)
 		ops->ndo_set_rx_mode(dev);
 }
 void dev_set_rx_mode(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 	__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
 }
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
  *
  *	Get the combination of flag bits exported through APIs to userspace.
  */
 unsigned int dev_get_flags(const struct net_device *dev)
 {
 	unsigned int flags;
 	flags = (dev->flags & ~(IFF_PROMISC |
 				IFF_ALLMULTI |
 				IFF_RUNNING |
 				IFF_LOWER_UP |
 				IFF_DORMANT)) |
 		(dev->gflags & (IFF_PROMISC |
 				IFF_ALLMULTI));
 	if (netif_running(dev)) {
 		if (netif_oper_up(dev))
 			flags |= IFF_RUNNING;
 		if (netif_carrier_ok(dev))
 			flags |= IFF_LOWER_UP;
 		if (netif_dormant(dev))
 			flags |= IFF_DORMANT;
 	}
 	return flags;
 }
 EXPORT_SYMBOL(dev_get_flags);
 int __dev_change_flags(struct net_device *dev, unsigned int flags)
 {
 	unsigned int old_flags = dev->flags;
 	int ret;
 	ASSERT_RTNL();
 	/*
 	 *	Set the flags on our device.
 	 */
 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
 			       IFF_AUTOMEDIA)) |
 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
 				    IFF_ALLMULTI));
 	/*
 	 *	Load in the correct multicast list now the flags have changed.
 	 */
 	if ((old_flags ^ flags) & IFF_MULTICAST)
 		dev_change_rx_flags(dev, IFF_MULTICAST);
 	dev_set_rx_mode(dev);
 	/*
 	 *	Have we downed the interface. We handle IFF_UP ourselves
 	 *	according to user attempts to set it, rather than blindly
 	 *	setting it.
 	 */
 	ret = 0;
 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 		if (!ret)
 			dev_set_rx_mode(dev);
 	}
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
 		unsigned int old_flags = dev->flags;
 		dev->gflags ^= IFF_PROMISC;
 		if (__dev_set_promiscuity(dev, inc, false) >= 0)
 			if (dev->flags != old_flags)
 				dev_set_rx_mode(dev);
 	}
 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
 	   is important. Some (broken) drivers set IFF_PROMISC, when
 	   IFF_ALLMULTI is requested not asking us and not reporting.
 	 */
 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
 		dev->gflags ^= IFF_ALLMULTI;
 		__dev_set_allmulti(dev, inc, false);
 	}
 	return ret;
 }
 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 			unsigned int gchanges)
 {
 	unsigned int changes = dev->flags ^ old_flags;
 	if (gchanges)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
 			call_netdevice_notifiers(NETDEV_UP, dev);
 		else
 			call_netdevice_notifiers(NETDEV_DOWN, dev);
 	}
 	if (dev->flags & IFF_UP &&
 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
 		struct netdev_notifier_change_info change_info;
 		change_info.flags_changed = changes;
 		call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
 					      &change_info.info);
 	}
 }
 /**
  *	dev_change_flags - change device settings
  *	@dev: device
  *	@flags: device state flags
  *
  *	Change settings on device based state flags. The flags are
  *	in the userspace exported format.
  */
 int dev_change_flags(struct net_device *dev, unsigned int flags)
 {
 	int ret;
 	unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
 	ret = __dev_change_flags(dev, flags);
 	if (ret < 0)
 		return ret;
 	changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
 	__dev_notify_flags(dev, old_flags, changes);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
 /**
  *	dev_set_mtu - Change maximum transfer unit
  *	@dev: device
  *	@new_mtu: new transfer unit
  *
  *	Change the maximum transfer size of the network device.
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 	if (new_mtu == dev->mtu)
 		return 0;
 	/*	MTU must be positive.	 */
 	if (new_mtu < 0)
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = 0;
 	if (ops->ndo_change_mtu)
 		err = ops->ndo_change_mtu(dev, new_mtu);
 	else
 		dev->mtu = new_mtu;
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_set_mtu);
 /**
  *	dev_set_group - Change group this device belongs to
  *	@dev: device
  *	@new_group: group this device should belong to
  */
 void dev_set_group(struct net_device *dev, int new_group)
 {
 	dev->group = new_group;
 }
 EXPORT_SYMBOL(dev_set_group);
 /**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
  *	@sa: new address
  *
  *	Change the hardware (MAC) address of the device
  */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 	if (!ops->ndo_set_mac_address)
 		return -EOPNOTSUPP;
 	if (sa->sa_family != dev->type)
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
 		return err;
 	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	return 0;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 /**
  *	dev_change_carrier - Change device carrier
  *	@dev: device
  *	@new_carrier: new value
  *
  *	Change device carrier
  */
 int dev_change_carrier(struct net_device *dev, bool new_carrier)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (!ops->ndo_change_carrier)
 		return -EOPNOTSUPP;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	return ops->ndo_change_carrier(dev, new_carrier);
 }
 EXPORT_SYMBOL(dev_change_carrier);
 /**
  *	dev_get_phys_port_id - Get device physical port ID
  *	@dev: device
  *	@ppid: port ID
  *
  *	Get device physical port ID
  */
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (!ops->ndo_get_phys_port_id)
 		return -EOPNOTSUPP;
 	return ops->ndo_get_phys_port_id(dev, ppid);
 }
 EXPORT_SYMBOL(dev_get_phys_port_id);
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
  *
  *	Returns a suitable unique value for a new device interface
  *	number.  The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
  */
 static int dev_new_index(struct net *net)
 {
 	int ifindex = net->ifindex;
 	for (;;) {
 		if (++ifindex <= 0)
 			ifindex = 1;
 		if (!__dev_get_by_index(net, ifindex))
 			return net->ifindex = ifindex;
 	}
 }
 /* Delayed registration/unregisteration */
 static LIST_HEAD(net_todo_list);
 static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
 static void net_set_todo(struct net_device *dev)
 {
 	list_add_tail(&dev->todo_list, &net_todo_list);
 	dev_net(dev)->dev_unreg_count++;
 }
 static void rollback_registered_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	LIST_HEAD(close_head);
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
 		/* Some devices call without registering
 		 * for initialization unwind. Remove those
 		 * devices and proceed with the remaining.
 		 */
 		if (dev->reg_state == NETREG_UNINITIALIZED) {
 			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
 				 dev->name, dev);
 			WARN_ON(1);
 			list_del(&dev->unreg_list);
 			continue;
 		}
 		dev->dismantle = true;
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
 	}
 	/* If device is running, close it first. */
 	list_for_each_entry(dev, head, unreg_list)
 		list_add_tail(&dev->close_list, &close_head);
 	dev_close_many(&close_head);
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
 		unlist_netdevice(dev);
 		dev->reg_state = NETREG_UNREGISTERING;
 	}
 	synchronize_net();
 	list_for_each_entry(dev, head, unreg_list) {
 		/* Shutdown queueing discipline. */
 		dev_shutdown(dev);
 		/* Notify protocols, that we are about to destroy
 		   this device. They should clean all the things.
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
 		dev_uc_flush(dev);
 		dev_mc_flush(dev);
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 		/* Notifier chain MUST detach us all upper devices. */
 		WARN_ON(netdev_has_any_upper_dev(dev));
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
 #ifdef CONFIG_XPS
 		/* Remove XPS queueing entries */
 		netif_reset_xps_queues_gt(dev, 0);
 #endif
 	}
 	synchronize_net();
 	list_for_each_entry(dev, head, unreg_list)
 		dev_put(dev);
 }
 static void rollback_registered(struct net_device *dev)
 {
 	LIST_HEAD(single);
 	list_add(&dev->unreg_list, &single);
 	rollback_registered_many(&single);
 	list_del(&single);
 }
 static netdev_features_t netdev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
 	/* Fix illegal checksum combinations */
 	if ((features & NETIF_F_HW_CSUM) &&
 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
 		features &= ~NETIF_F_ALL_TSO;
 	}
 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
 					!(features & NETIF_F_IP_CSUM)) {
 		netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
 		features &= ~NETIF_F_TSO;
 		features &= ~NETIF_F_TSO_ECN;
 	}
 	if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
 					 !(features & NETIF_F_IPV6_CSUM)) {
 		netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
 		features &= ~NETIF_F_TSO6;
 	}
 	/* TSO ECN requires that TSO is present as well. */
 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
 		features &= ~NETIF_F_TSO_ECN;
 	/* Software GSO depends on SG. */
 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
 		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
 		features &= ~NETIF_F_GSO;
 	}
 	/* UFO needs SG and checksumming */
 	if (features & NETIF_F_UFO) {
 		/* maybe split UFO into V4 and V6? */
 		if (!((features & NETIF_F_GEN_CSUM) ||
 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 			features &= ~NETIF_F_UFO;
 		}
 		if (!(features & NETIF_F_SG)) {
 			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 			features &= ~NETIF_F_UFO;
 		}
 	}
 	return features;
 }
 int __netdev_update_features(struct net_device *dev)
 {
 	netdev_features_t features;
 	int err = 0;
 	ASSERT_RTNL();
 	features = netdev_get_wanted_features(dev);
 	if (dev->netdev_ops->ndo_fix_features)
 		features = dev->netdev_ops->ndo_fix_features(dev, features);
 	/* driver might be less strict about feature dependencies */
 	features = netdev_fix_features(dev, features);
 	if (dev->features == features)
 		return 0;
 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
 		&dev->features, &features);
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 	if (unlikely(err < 0)) {
 		netdev_err(dev,
 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
 			err, &features, &dev->features);
 		return -1;
 	}
 	if (!err)
 		dev->features = features;
 	return 1;
 }
 /**
  *	netdev_update_features - recalculate device features
  *	@dev: the device to check
  *
  *	Recalculate dev->features set and send notifications if it
  *	has changed. Should be called after driver or hardware dependent
  *	conditions might have changed that influence the features.
  */
 void netdev_update_features(struct net_device *dev)
 {
 	if (__netdev_update_features(dev))
 		netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_update_features);
 /**
  *	netdev_change_features - recalculate device features
  *	@dev: the device to check
  *
  *	Recalculate dev->features set and send notifications even
  *	if they have not changed. Should be called instead of
  *	netdev_update_features() if also dev->vlan_features might
  *	have changed to allow the changes to be propagated to stacked
  *	VLAN devices.
  */
 void netdev_change_features(struct net_device *dev)
 {
 	__netdev_update_features(dev);
 	netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_change_features);
 /**
  *	netif_stacked_transfer_operstate -	transfer operstate
  *	@rootdev: the root or lower level device to transfer state from
  *	@dev: the device to transfer operstate to
  *
  *	Transfer operational state from root to device. This is normally
  *	called when a stacking relationship exists between the root
  *	device and the device(a leaf device).
  */
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev)
 {
 	if (rootdev->operstate == IF_OPER_DORMANT)
 		netif_dormant_on(dev);
 	else
 		netif_dormant_off(dev);
 	if (netif_carrier_ok(rootdev)) {
 		if (!netif_carrier_ok(dev))
 			netif_carrier_on(dev);
 	} else {
 		if (netif_carrier_ok(dev))
 			netif_carrier_off(dev);
 	}
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 #ifdef CONFIG_RPS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 	BUG_ON(count < 1);
 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
 	if (!rx)
 		return -ENOMEM;
 	dev->_rx = rx;
 	for (i = 0; i < count; i++)
 		rx[i].dev = dev;
 	return 0;
 }
 #endif
 static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue, void *_unused)
 {
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 #ifdef CONFIG_BQL
 	dql_init(&queue->dql, HZ);
 #endif
 }
 static void netif_free_tx_queues(struct net_device *dev)
 {
 	if (is_vmalloc_addr(dev->_tx))
 		vfree(dev->_tx);
 	else
 		kfree(dev->_tx);
 }
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
 	size_t sz = count * sizeof(*tx);
 	BUG_ON(count < 1 || count > 0xffff);
 	tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 	if (!tx) {
 		tx = vzalloc(sz);
 		if (!tx)
 			return -ENOMEM;
 	}
 	dev->_tx = tx;
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
 	return 0;
 }
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
  *
  *	Take a completed network device structure and add it to the kernel
  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
  *	Callers must hold the rtnl semaphore. You may want
  *	register_netdev() instead of this.
  *
  *	BUGS:
  *	The locking appears insufficient to guarantee two parallel registers
  *	will not get the same name.
  */
 int register_netdevice(struct net_device *dev)
 {
 	int ret;
 	struct net *net = dev_net(dev);
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 	might_sleep();
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 	BUG_ON(!net);
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 	dev->iflink = -1;
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
 		goto out;
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
 			goto out;
 		}
 	}
 	if (((dev->hw_features | dev->features) &
 	     NETIF_F_HW_VLAN_CTAG_FILTER) &&
 	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
 	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
 		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
 		ret = -EINVAL;
 		goto err_uninit;
 	}
 	ret = -EBUSY;
 	if (!dev->ifindex)
 		dev->ifindex = dev_new_index(net);
 	else if (__dev_get_by_index(net, dev->ifindex))
 		goto err_uninit;
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 	/* Transfer changeable features to wanted_features and enable
 	 * software offloads (GSO and GRO).
 	 */
 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
 	dev->features |= NETIF_F_SOFT_FEATURES;
 	dev->wanted_features = dev->features & dev->hw_features;
 	/* Turn on no cache copy if HW is doing checksum */
 	if (!(dev->flags & IFF_LOOPBACK)) {
 		dev->hw_features |= NETIF_F_NOCACHE_COPY;
 		if (dev->features & NETIF_F_ALL_CSUM) {
 			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
 			dev->features |= NETIF_F_NOCACHE_COPY;
 		}
 	}
 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
 	 */
 	dev->vlan_features |= NETIF_F_HIGHDMA;
 	/* Make NETIF_F_SG inheritable to tunnel devices.
 	 */
 	dev->hw_enc_features |= NETIF_F_SG;
 	/* Make NETIF_F_SG inheritable to MPLS.
 	 */
 	dev->mpls_features |= NETIF_F_SG;
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
 		goto err_uninit;
 	ret = netdev_register_kobject(dev);
 	if (ret)
 		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 	__netdev_update_features(dev);
 	/*
 	 *	Default initial state at registry is that the
 	 *	device is present.
 	 */
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	linkwatch_init_dev(dev);
 	dev_init_scheduler(dev);
 	dev_hold(dev);
 	list_netdevice(dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	/* If the device has permanent device address, driver should
 	 * set dev_addr and also addr_assign_type should be set to
 	 * NET_ADDR_PERM (default value).
 	 */
 	if (dev->addr_assign_type == NET_ADDR_PERM)
 		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 	/* Notify protocols, that a new device appeared. */
 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		rollback_registered(dev);
 		dev->reg_state = NETREG_UNREGISTERED;
 	}
 	/*
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 out:
 	return ret;
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
 /**
  *	init_dummy_netdev	- init a dummy network device for NAPI
  *	@dev: device to init
  *
  *	This takes a network device structure and initialize the minimum
  *	amount of fields so it can be used to schedule NAPI polls without
  *	registering a full blown interface. This is to be used by drivers
  *	that need to tie several hardware interfaces to a single NAPI
  *	poll scheduler due to HW limitations.
  */
 int init_dummy_netdev(struct net_device *dev)
 {
 	/* Clear everything. Note we don't initialize spinlocks
 	 * are they aren't supposed to be taken by any of the
 	 * NAPI code and this dummy netdev is supposed to be
 	 * only ever used for NAPI polls
 	 */
 	memset(dev, 0, sizeof(struct net_device));
 	/* make sure we BUG if trying to hit standard
 	 * register/unregister code path
 	 */
 	dev->reg_state = NETREG_DUMMY;
 	/* NAPI wants this */
 	INIT_LIST_HEAD(&dev->napi_list);
 	/* a dummy interface is started by default */
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	set_bit(__LINK_STATE_START, &dev->state);
 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
 	 * because users of this 'device' dont need to change
 	 * its refcount.
 	 */
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
 /**
  *	register_netdev	- register a network device
  *	@dev: device to register
  *
  *	Take a completed network device structure and add it to the kernel
  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
  *	and expands the device name if you passed a format string to
  *	alloc_netdev.
  */
 int register_netdev(struct net_device *dev)
 {
 	int err;
 	rtnl_lock();
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(register_netdev);
 int netdev_refcnt_read(const struct net_device *dev)
 {
 	int i, refcnt = 0;
 	for_each_possible_cpu(i)
 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
 	return refcnt;
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 /**
  * netdev_wait_allrefs - wait until all references are gone.
  * @dev: target net_device
  *
  * This is called when unregistering network devices.
  *
  * Any protocol or device that holds a reference should register
  * for netdevice notification, and cleanup and put back the
  * reference if they receive an UNREGISTER event.
  * We can get stuck here if buggy protocols don't correctly
  * call dev_put.
  */
 static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
 	int refcnt;
 	linkwatch_forget_dev(dev);
 	rebroadcast_time = warning_time = jiffies;
 	refcnt = netdev_refcnt_read(dev);
 	while (refcnt != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 			__rtnl_unlock();
 			rcu_barrier();
 			rtnl_lock();
 			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
 				/* We must not have linkwatch events
 				 * pending on unregister. If this
 				 * happens, we simply run the queue
 				 * unscheduled, resulting in a noop
 				 * for this device.
 				 */
 				linkwatch_run_queue();
 			}
 			__rtnl_unlock();
 			rebroadcast_time = jiffies;
 		}
 		msleep(250);
 		refcnt = netdev_refcnt_read(dev);
 		if (time_after(jiffies, warning_time + 10 * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
 		}
 	}
 }
 /* The sequence is:
  *
  *	rtnl_lock();
  *	...
  *	register_netdevice(x1);
  *	register_netdevice(x2);
  *	...
  *	unregister_netdevice(y1);
  *	unregister_netdevice(y2);
  *      ...
  *	rtnl_unlock();
  *	free_netdev(y1);
  *	free_netdev(y2);
  *
  * We are invoked by rtnl_unlock().
  * This allows us to deal with problems:
  * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
  *
  * We must not return until all unregister events added during
  * the interval the lock was held have been completed.
  */
 void netdev_run_todo(void)
 {
 	struct list_head list;
 	/* Snapshot list, allow later requests */
 	list_replace_init(&net_todo_list, &list);
 	__rtnl_unlock();
 	/* Wait for rcu callbacks to finish before next phase */
 	if (!list_empty(&list))
 		rcu_barrier();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 		rtnl_lock();
 		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 		__rtnl_unlock();
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
 			pr_err("network todo '%s' but state %d\n",
 			       dev->name, dev->reg_state);
 			dump_stack();
 			continue;
 		}
 		dev->reg_state = NETREG_UNREGISTERED;
 		on_each_cpu(flush_backlog, dev, 1);
 		netdev_wait_allrefs(dev);
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 		if (dev->destructor)
 			dev->destructor(dev);
 		/* Report a network device has been unregistered */
 		rtnl_lock();
 		dev_net(dev)->dev_unreg_count--;
 		__rtnl_unlock();
 		wake_up(&netdev_unregistering_wq);
 		/* Free network device */
 		kobject_put(&dev->dev.kobj);
 	}
 }
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 			     const struct net_device_stats *netdev_stats)
 {
 #if BITS_PER_LONG == 64
 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
 	memcpy(stats64, netdev_stats, sizeof(*stats64));
 #else
 	size_t i, n = sizeof(*stats64) / sizeof(u64);
 	const unsigned long *src = (const unsigned long *)netdev_stats;
 	u64 *dst = (u64 *)stats64;
 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
 		     sizeof(*stats64) / sizeof(u64));
 	for (i = 0; i < n; i++)
 		dst[i] = src[i];
 #endif
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
  *	@storage: place to store stats
  *
  *	Get network statistics from device. Return @storage.
  *	The device driver may provide its own method by setting
  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
  *	otherwise the internal statistics structure is used.
  */
 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (ops->ndo_get_stats64) {
 		memset(storage, 0, sizeof(*storage));
 		ops->ndo_get_stats64(dev, storage);
 	} else if (ops->ndo_get_stats) {
 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 {
 	struct netdev_queue *queue = dev_ingress_queue(dev);
 #ifdef CONFIG_NET_CLS_ACT
 	if (queue)
 		return queue;
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
 		return NULL;
 	netdev_init_one_queue(dev, queue, NULL);
 	queue->qdisc = &noop_qdisc;
 	queue->qdisc_sleeping = &noop_qdisc;
 	rcu_assign_pointer(dev->ingress_queue, queue);
 #endif
 	return queue;
 }
 static const struct ethtool_ops default_ethtool_ops;
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops)
 {
 	if (dev->ethtool_ops == &default_ethtool_ops)
 		dev->ethtool_ops = ops;
 }
 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 void netdev_freemem(struct net_device *dev)
 {
 	char *addr = (char *)dev - dev->padded;
 	if (is_vmalloc_addr(addr))
 		vfree(addr);
 	else
 		kfree(addr);
 }
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
  *	@txqs:		the number of TX subqueues to allocate
  *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subquue structs
  *	for each queue on the device.
  */
 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *),
 		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 	if (txqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
 		return NULL;
 	}
 #ifdef CONFIG_RPS
 	if (rxqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
 		return NULL;
 	}
 #endif
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
 		alloc_size += sizeof_priv;
 	}
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 	p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 	if (!p)
 		p = vzalloc(alloc_size);
 	if (!p)
 		return NULL;
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
 	if (dev_addr_init(dev))
 		goto free_pcpu;
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 	dev_net_set(dev, &init_net);
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->close_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	dev->num_tx_queues = txqs;
 	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
 		goto free_all;
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = rxqs;
 	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_all;
 #endif
 	strcpy(dev->name, name);
 	dev->group = INIT_NETDEV_GROUP;
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
 	return dev;
 free_all:
 	free_netdev(dev);
 	return NULL;
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_RPS
 	kfree(dev->_rx);
 #endif
 free_dev:
 	netdev_freemem(dev);
 	return NULL;
 }
 EXPORT_SYMBOL(alloc_netdev_mqs);
 /**
  *	free_netdev - free network device
  *	@dev: device
  *
  *	This function does the last stage of destroying an allocated device
  * 	interface. The reference to the device object is released.
  *	If this is the last reference then it will be freed.
  */
 void free_netdev(struct net_device *dev)
 {
 	struct napi_struct *p, *n;
 	release_net(dev_net(dev));
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_RPS
 	kfree(dev->_rx);
 #endif
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		netdev_freemem(dev);
 		return;
 	}
 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
 	dev->reg_state = NETREG_RELEASED;
 	/* will free via device release */
 	put_device(&dev->dev);
 }
 EXPORT_SYMBOL(free_netdev);
 /**
  *	synchronize_net -  Synchronize with packet receive processing
  *
  *	Wait for packets currently being received to be done.
  *	Does not block later packets from starting.
  */
 void synchronize_net(void)
 {
 	might_sleep();
 	if (rtnl_is_locked())
 		synchronize_rcu_expedited();
 	else
 		synchronize_rcu();
 }
 EXPORT_SYMBOL(synchronize_net);
 /**
  *	unregister_netdevice_queue - remove device from the kernel
  *	@dev: device
  *	@head: list
  *
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
  *	If head not NULL, device is queued to be unregistered later.
  *
  *	Callers must hold the rtnl semaphore.  You may want
  *	unregister_netdev() instead of this.
  */
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
 {
 	ASSERT_RTNL();
 	if (head) {
 		list_move_tail(&dev->unreg_list, head);
 	} else {
 		rollback_registered(dev);
 		/* Finish processing unregister after unlock */
 		net_set_todo(dev);
 	}
 }
 EXPORT_SYMBOL(unregister_netdevice_queue);
 /**
  *	unregister_netdevice_many - unregister many devices
  *	@head: list of devices
  */
 void unregister_netdevice_many(struct list_head *head)
 {
 	struct net_device *dev;
 	if (!list_empty(head)) {
 		rollback_registered_many(head);
 		list_for_each_entry(dev, head, unreg_list)
 			net_set_todo(dev);
 	}
 }
 EXPORT_SYMBOL(unregister_netdevice_many);
 /**
  *	unregister_netdev - remove device from the kernel
  *	@dev: device
  *
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
  *
  *	This is just a wrapper for unregister_netdevice that takes
  *	the rtnl semaphore.  In general you want to use this and not
  *	unregister_netdevice.
  */
 void unregister_netdev(struct net_device *dev)
 {
 	rtnl_lock();
 	unregister_netdevice(dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(unregister_netdev);
 /**
  *	dev_change_net_namespace - move device to different nethost namespace
  *	@dev: device
  *	@net: network namespace
  *	@pat: If not NULL name pattern to try if the current device name
  *	      is already taken in the destination network namespace.
  *
  *	This function shuts down a device interface and moves it
  *	to a new network namespace. On success 0 is returned, on
  *	a failure a netagive errno code is returned.
  *
  *	Callers must hold the rtnl semaphore.
  */
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
 	int err;
 	ASSERT_RTNL();
 	/* Don't allow namespace local devices to be moved. */
 	err = -EINVAL;
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 	/* Ensure the device has been registrered */
 	if (dev->reg_state != NETREG_REGISTERED)
 		goto out;
 	/* Get out if there is nothing todo */
 	err = 0;
 	if (net_eq(dev_net(dev), net))
 		goto out;
 	/* Pick the destination device name, and ensure
 	 * we can use it in the destination network namespace.
 	 */
 	err = -EEXIST;
 	if (__dev_get_by_name(net, dev->name)) {
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
 		if (dev_get_valid_name(net, dev, pat) < 0)
 			goto out;
 	}
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
 	/* If device is running close it first. */
 	dev_close(dev);
 	/* And unlink it from device chain */
 	err = -ENODEV;
 	unlist_netdevice(dev);
 	synchronize_net();
 	/* Shutdown queueing discipline. */
 	dev_shutdown(dev);
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
 	   Note that dev->reg_state stays at NETREG_REGISTERED.
 	   This is wanted because this way 8021q and macvlan know
 	   the device is just moving and can keep their slaves up.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
 	dev_uc_flush(dev);
 	dev_mc_flush(dev);
 	/* Send a netdev-removed uevent to the old namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex)) {
 		int iflink = (dev->iflink == dev->ifindex);
 		dev->ifindex = dev_new_index(net);
 		if (iflink)
 			dev->iflink = dev->ifindex;
 	}
 	/* Send a netdev-add uevent to the new namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
 	/* Fixup kobjects */
 	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 	/* Add the device back in the hashes */
 	list_netdevice(dev);
 	/* Notify protocols, that a new device appeared. */
 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	/*
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 	synchronize_net();
 	err = 0;
 out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 static int dev_cpu_callback(struct notifier_block *nfb,
 			    unsigned long action,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
 		return NOTIFY_OK;
 	local_irq_disable();
 	cpu = smp_processor_id();
 	sd = &per_cpu(softnet_data, cpu);
 	oldsd = &per_cpu(softnet_data, oldcpu);
 	/* Find end of our completion_queue. */
 	list_skb = &sd->completion_queue;
 	while (*list_skb)
 		list_skb = &(*list_skb)->next;
 	/* Append completion queue from offline CPU. */
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 	/* Append output queue from offline CPU. */
 	if (oldsd->output_queue) {
 		*sd->output_queue_tailp = oldsd->output_queue;
 		sd->output_queue_tailp = oldsd->output_queue_tailp;
 		oldsd->output_queue = NULL;
 		oldsd->output_queue_tailp = &oldsd->output_queue;
 	}
 	/* Append NAPI poll list from offline CPU. */
 	if (!list_empty(&oldsd->poll_list)) {
 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
 		raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	}
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 		netif_rx(skb);
 		input_queue_head_incr(oldsd);
 	}
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
 		input_queue_head_incr(oldsd);
 	}
 	return NOTIFY_OK;
 }
 /**
  *	netdev_increment_features - increment feature set by one
  *	@all: current feature set
  *	@one: new feature set
  *	@mask: mask feature set
  *
  *	Computes a new feature set after adding a device with feature set
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
 netdev_features_t netdev_increment_features(netdev_features_t all,
 	netdev_features_t one, netdev_features_t mask)
 {
 	if (mask & NETIF_F_GEN_CSUM)
 		mask |= NETIF_F_ALL_CSUM;
 	mask |= NETIF_F_VLAN_CHALLENGED;
 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
 	all &= one | ~NETIF_F_ALL_FOR_ALL;
 	/* If one device supports hw checksumming, set for all. */
 	if (all & NETIF_F_GEN_CSUM)
 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
 	return all;
 }
 EXPORT_SYMBOL(netdev_increment_features);
 static struct hlist_head * __net_init netdev_create_hash(void)
 {
 	int i;
 	struct hlist_head *hash;
 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
 	if (hash != NULL)
 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
 			INIT_HLIST_HEAD(&hash[i]);
 	return hash;
 }
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 	net->dev_name_head = netdev_create_hash();
 	if (net->dev_name_head == NULL)
 		goto err_name;
 	net->dev_index_head = netdev_create_hash();
 	if (net->dev_index_head == NULL)
 		goto err_idx;
 	return 0;
 err_idx:
 	kfree(net->dev_name_head);
 err_name:
 	return -ENOMEM;
 }
 /**
  *	netdev_drivername - network driver for the device
  *	@dev: network device
  *
  *	Determine network driver for device.
  */
 const char *netdev_drivername(const struct net_device *dev)
 {
 	const struct device_driver *driver;
 	const struct device *parent;
 	const char *empty = "";
 	parent = dev->dev.parent;
 	if (!parent)
 		return empty;
 	driver = parent->driver;
 	if (driver && driver->name)
 		return driver->name;
 	return empty;
 }
 static int __netdev_printk(const char *level, const struct net_device *dev,
 			   struct va_format *vaf)
 {
 	int r;
 	if (dev && dev->dev.parent) {
 		r = dev_printk_emit(level[1] - '0',
 				    dev->dev.parent,
 				    "%s %s %s: %pV",
 				    dev_driver_string(dev->dev.parent),
 				    dev_name(dev->dev.parent),
 				    netdev_name(dev), vaf);
 	} else if (dev) {
 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
 	} else {
 		r = printk("%s(NULL net_device): %pV", level, vaf);
 	}
 	return r;
 }
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
 	int r;
 	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 	r = __netdev_printk(level, dev, &vaf);
 	va_end(args);
 	return r;
 }
 EXPORT_SYMBOL(netdev_printk);
 #define define_netdev_printk_level(func, level)			\
 int func(const struct net_device *dev, const char *fmt, ...)	\
 {								\
 	int r;							\
 	struct va_format vaf;					\
 	va_list args;						\
 								\
 	va_start(args, fmt);					\
 								\
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
 	r = __netdev_printk(level, dev, &vaf);			\
 								\
 	va_end(args);						\
 								\
 	return r;						\
 }								\
 EXPORT_SYMBOL(func);
 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
 define_netdev_printk_level(netdev_alert, KERN_ALERT);
 define_netdev_printk_level(netdev_crit, KERN_CRIT);
 define_netdev_printk_level(netdev_err, KERN_ERR);
 define_netdev_printk_level(netdev_warn, KERN_WARNING);
 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
 define_netdev_printk_level(netdev_info, KERN_INFO);
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
 	kfree(net->dev_index_head);
 }
 static struct pernet_operations __net_initdata netdev_net_ops = {
 	.init = netdev_init,
 	.exit = netdev_exit,
 };
 static void __net_exit default_device_exit(struct net *net)
 {
 	struct net_device *dev, *aux;
 	/*
 	 * Push all migratable network devices back to the
 	 * initial network namespace
 	 */
 	rtnl_lock();
 	for_each_netdev_safe(net, dev, aux) {
 		int err;
 		char fb_name[IFNAMSIZ];
 		/* Ignore unmoveable devices (i.e. loopback) */
 		if (dev->features & NETIF_F_NETNS_LOCAL)
 			continue;
 		/* Leave virtual devices for the generic cleanup */
 		if (dev->rtnl_link_ops)
 			continue;
 		/* Push remaining network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",
 				 __func__, dev->name, err);
 			BUG();
 		}
 	}
 	rtnl_unlock();
 }
 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 {
 	/* Return with the rtnl_lock held when there are no network
 	 * devices unregistering in any network namespace in net_list.
 	 */
 	struct net *net;
 	bool unregistering;
 	DEFINE_WAIT(wait);
 	for (;;) {
 		prepare_to_wait(&netdev_unregistering_wq, &wait,
 				TASK_UNINTERRUPTIBLE);
 		unregistering = false;
 		rtnl_lock();
 		list_for_each_entry(net, net_list, exit_list) {
 			if (net->dev_unreg_count > 0) {
 				unregistering = true;
 				break;
 			}
 		}
 		if (!unregistering)
 			break;
 		__rtnl_unlock();
 		schedule();
 	}
 	finish_wait(&netdev_unregistering_wq, &wait);
 }
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
 {
 	/* At exit all network devices most be removed from a network
 	 * namespace.  Do this in the reverse order of registration.
 	 * Do this across as many network namespaces as possible to
 	 * improve batching efficiency.
 	 */
 	struct net_device *dev;
 	struct net *net;
 	LIST_HEAD(dev_kill_list);
 	/* To prevent network device cleanup code from dereferencing
 	 * loopback devices or network devices that have been freed
 	 * wait here for all pending unregistrations to complete,
 	 * before unregistring the loopback device and allowing the
 	 * network namespace be freed.
 	 *
 	 * The netdev todo list containing all network devices
 	 * unregistrations that happen in default_device_exit_batch
 	 * will run in the rtnl_unlock() at the end of
 	 * default_device_exit_batch.
 	 */
 	rtnl_lock_unregistering(net_list);
 	list_for_each_entry(net, net_list, exit_list) {
 		for_each_netdev_reverse(net, dev) {
 			if (dev->rtnl_link_ops)
 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
 			else
 				unregister_netdevice_queue(dev, &dev_kill_list);
 		}
 	}
 	unregister_netdevice_many(&dev_kill_list);
 	list_del(&dev_kill_list);
 	rtnl_unlock();
 }
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
 };
 /*
  *	Initialize the DEV module. At boot time this walks the device list and
  *	unhooks any devices that fail to initialise (normally hardware not
  *	present) and leaves us with a valid list of present and active devices.
  *
  */
 /*
  *       This is called single threaded during boot, so no need
  *       to take the rtnl semaphore.
  */
 static int __init net_dev_init(void)
 {
 	int i, rc = -ENOMEM;
 	BUG_ON(!dev_boot_phase);
 	if (dev_proc_init())
 		goto out;
 	if (netdev_kobject_init())
 		goto out;
 	INIT_LIST_HEAD(&ptype_all);
 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
 	INIT_LIST_HEAD(&offload_base);
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 	/*
 	 *	Initialise the packet receive queues.
 	 */
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 		memset(sd, 0, sizeof(*sd));
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue = NULL;
 		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
 		sd->csd.flags = 0;
 		sd->cpu = i;
 #endif
 		sd->backlog.poll = process_backlog;
 		sd->backlog.weight = weight_p;
 		sd->backlog.gro_list = NULL;
 		sd->backlog.gro_count = 0;
 #ifdef CONFIG_NET_FLOW_LIMIT
 		sd->flow_limit = NULL;
 #endif
 	}
 	dev_boot_phase = 0;
 	/* The loopback device is special if any other network devices
 	 * is present in a network namespace the loopback device must
 	 * be present. Since we now dynamically allocate and free the
 	 * loopback device ensure this invariant is maintained by
 	 * keeping the loopback device as the first device on the
 	 * list of network devices.  Ensuring the loopback devices
 	 * is the first device that appears and the last network device
 	 * that disappears.
 	 */
 	if (register_pernet_device(&loopback_net_ops))
 		goto out;
 	if (register_pernet_device(&default_device_ops))
 		goto out;
 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
 	rc = 0;
 out:
 	return rc;
 }
 subsys_initcall(net_dev_init);