Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit 8e3bff96afa67369008153f3326fa5ce985cabab

Authored by stephen hemminger 2013-12-09 04:15:44 +0800

Committed by David S. Miller 2013-12-11 10:57:11 +0800

Exists in smarc-imx_3.14.28_1.0.0_ga and in 1 other branch

net: more spelling fixes

Various spelling fixes in networking stack

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 5 changed files with 11 additions and 11 deletions Inline Diff

net/bridge/br_netlink.c
net/core/net-sysfs.c
net/core/netprio_cgroup.c
net/ipv4/ip_sockglue.c
net/ipv4/tcp_output.c

net/bridge/br_netlink.c

Diff comments View file @ 8e3bff9

1	/*	1	/*
2	* Bridge netlink control interface	2	* Bridge netlink control interface
3	*	3	*
4	* Authors:	4	* Authors:
5	* Stephen Hemminger <shemminger@osdl.org>	5	* Stephen Hemminger <shemminger@osdl.org>
6	*	6	*
7	* This program is free software; you can redistribute it and/or	7	* This program is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU General Public License	8	* modify it under the terms of the GNU General Public License
9	* as published by the Free Software Foundation; either version	9	* as published by the Free Software Foundation; either version
10	* 2 of the License, or (at your option) any later version.	10	* 2 of the License, or (at your option) any later version.
11	*/	11	*/
12		12
13	#include <linux/kernel.h>	13	#include <linux/kernel.h>
14	#include <linux/slab.h>	14	#include <linux/slab.h>
15	#include <linux/etherdevice.h>	15	#include <linux/etherdevice.h>
16	#include <net/rtnetlink.h>	16	#include <net/rtnetlink.h>
17	#include <net/net_namespace.h>	17	#include <net/net_namespace.h>
18	#include <net/sock.h>	18	#include <net/sock.h>
19	#include <uapi/linux/if_bridge.h>	19	#include <uapi/linux/if_bridge.h>
20		20
21	#include "br_private.h"	21	#include "br_private.h"
22	#include "br_private_stp.h"	22	#include "br_private_stp.h"
23		23
24	static inline size_t br_port_info_size(void)	24	static inline size_t br_port_info_size(void)
25	{	25	{
26	return nla_total_size(1) /* IFLA_BRPORT_STATE */	26	return nla_total_size(1) /* IFLA_BRPORT_STATE */
27	+ nla_total_size(2) /* IFLA_BRPORT_PRIORITY */	27	+ nla_total_size(2) /* IFLA_BRPORT_PRIORITY */
28	+ nla_total_size(4) /* IFLA_BRPORT_COST */	28	+ nla_total_size(4) /* IFLA_BRPORT_COST */
29	+ nla_total_size(1) /* IFLA_BRPORT_MODE */	29	+ nla_total_size(1) /* IFLA_BRPORT_MODE */
30	+ nla_total_size(1) /* IFLA_BRPORT_GUARD */	30	+ nla_total_size(1) /* IFLA_BRPORT_GUARD */
31	+ nla_total_size(1) /* IFLA_BRPORT_PROTECT */	31	+ nla_total_size(1) /* IFLA_BRPORT_PROTECT */
32	+ nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */	32	+ nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */
33	+ nla_total_size(1) /* IFLA_BRPORT_LEARNING */	33	+ nla_total_size(1) /* IFLA_BRPORT_LEARNING */
34	+ nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */	34	+ nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
35	+ 0;	35	+ 0;
36	}	36	}
37		37
38	static inline size_t br_nlmsg_size(void)	38	static inline size_t br_nlmsg_size(void)
39	{	39	{
40	return NLMSG_ALIGN(sizeof(struct ifinfomsg))	40	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
41	+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */	41	+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
42	+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */	42	+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
43	+ nla_total_size(4) /* IFLA_MASTER */	43	+ nla_total_size(4) /* IFLA_MASTER */
44	+ nla_total_size(4) /* IFLA_MTU */	44	+ nla_total_size(4) /* IFLA_MTU */
45	+ nla_total_size(4) /* IFLA_LINK */	45	+ nla_total_size(4) /* IFLA_LINK */
46	+ nla_total_size(1) /* IFLA_OPERSTATE */	46	+ nla_total_size(1) /* IFLA_OPERSTATE */
47	+ nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */	47	+ nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */
48	}	48	}
49		49
50	static int br_port_fill_attrs(struct sk_buff *skb,	50	static int br_port_fill_attrs(struct sk_buff *skb,
51	const struct net_bridge_port *p)	51	const struct net_bridge_port *p)
52	{	52	{
53	u8 mode = !!(p->flags & BR_HAIRPIN_MODE);	53	u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
54		54
55	if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) \|\|	55	if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) \|\|
56	nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) \|\|	56	nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) \|\|
57	nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) \|\|	57	nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) \|\|
58	nla_put_u8(skb, IFLA_BRPORT_MODE, mode) \|\|	58	nla_put_u8(skb, IFLA_BRPORT_MODE, mode) \|\|
59	nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) \|\|	59	nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) \|\|
60	nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) \|\|	60	nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) \|\|
61	nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) \|\|	61	nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) \|\|
62	nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) \|\|	62	nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) \|\|
63	nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)))	63	nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)))
64	return -EMSGSIZE;	64	return -EMSGSIZE;
65		65
66	return 0;	66	return 0;
67	}	67	}
68		68
69	/*	69	/*
70	* Create one netlink message for one interface	70	* Create one netlink message for one interface
71	* Contains port and master info as well as carrier and bridge state.	71	* Contains port and master info as well as carrier and bridge state.
72	*/	72	*/
73	static int br_fill_ifinfo(struct sk_buff *skb,	73	static int br_fill_ifinfo(struct sk_buff *skb,
74	const struct net_bridge_port *port,	74	const struct net_bridge_port *port,
75	u32 pid, u32 seq, int event, unsigned int flags,	75	u32 pid, u32 seq, int event, unsigned int flags,
76	u32 filter_mask, const struct net_device *dev)	76	u32 filter_mask, const struct net_device *dev)
77	{	77	{
78	const struct net_bridge *br;	78	const struct net_bridge *br;
79	struct ifinfomsg *hdr;	79	struct ifinfomsg *hdr;
80	struct nlmsghdr *nlh;	80	struct nlmsghdr *nlh;
81	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;	81	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
82		82
83	if (port)	83	if (port)
84	br = port->br;	84	br = port->br;
85	else	85	else
86	br = netdev_priv(dev);	86	br = netdev_priv(dev);
87		87
88	br_debug(br, "br_fill_info event %d port %s master %s\n",	88	br_debug(br, "br_fill_info event %d port %s master %s\n",
89	event, dev->name, br->dev->name);	89	event, dev->name, br->dev->name);
90		90
91	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);	91	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
92	if (nlh == NULL)	92	if (nlh == NULL)
93	return -EMSGSIZE;	93	return -EMSGSIZE;
94		94
95	hdr = nlmsg_data(nlh);	95	hdr = nlmsg_data(nlh);
96	hdr->ifi_family = AF_BRIDGE;	96	hdr->ifi_family = AF_BRIDGE;
97	hdr->__ifi_pad = 0;	97	hdr->__ifi_pad = 0;
98	hdr->ifi_type = dev->type;	98	hdr->ifi_type = dev->type;
99	hdr->ifi_index = dev->ifindex;	99	hdr->ifi_index = dev->ifindex;
100	hdr->ifi_flags = dev_get_flags(dev);	100	hdr->ifi_flags = dev_get_flags(dev);
101	hdr->ifi_change = 0;	101	hdr->ifi_change = 0;
102		102
103	if (nla_put_string(skb, IFLA_IFNAME, dev->name) \|\|	103	if (nla_put_string(skb, IFLA_IFNAME, dev->name) \|\|
104	nla_put_u32(skb, IFLA_MASTER, br->dev->ifindex) \|\|	104	nla_put_u32(skb, IFLA_MASTER, br->dev->ifindex) \|\|
105	nla_put_u32(skb, IFLA_MTU, dev->mtu) \|\|	105	nla_put_u32(skb, IFLA_MTU, dev->mtu) \|\|
106	nla_put_u8(skb, IFLA_OPERSTATE, operstate) \|\|	106	nla_put_u8(skb, IFLA_OPERSTATE, operstate) \|\|
107	(dev->addr_len &&	107	(dev->addr_len &&
108	nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) \|\|	108	nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) \|\|
109	(dev->ifindex != dev->iflink &&	109	(dev->ifindex != dev->iflink &&
110	nla_put_u32(skb, IFLA_LINK, dev->iflink)))	110	nla_put_u32(skb, IFLA_LINK, dev->iflink)))
111	goto nla_put_failure;	111	goto nla_put_failure;
112		112
113	if (event == RTM_NEWLINK && port) {	113	if (event == RTM_NEWLINK && port) {
114	struct nlattr *nest	114	struct nlattr *nest
115	= nla_nest_start(skb, IFLA_PROTINFO \| NLA_F_NESTED);	115	= nla_nest_start(skb, IFLA_PROTINFO \| NLA_F_NESTED);
116		116
117	if (nest == NULL \|\| br_port_fill_attrs(skb, port) < 0)	117	if (nest == NULL \|\| br_port_fill_attrs(skb, port) < 0)
118	goto nla_put_failure;	118	goto nla_put_failure;
119	nla_nest_end(skb, nest);	119	nla_nest_end(skb, nest);
120	}	120	}
121		121
122	/* Check if the VID information is requested */	122	/* Check if the VID information is requested */
123	if (filter_mask & RTEXT_FILTER_BRVLAN) {	123	if (filter_mask & RTEXT_FILTER_BRVLAN) {
124	struct nlattr *af;	124	struct nlattr *af;
125	const struct net_port_vlans *pv;	125	const struct net_port_vlans *pv;
126	struct bridge_vlan_info vinfo;	126	struct bridge_vlan_info vinfo;
127	u16 vid;	127	u16 vid;
128	u16 pvid;	128	u16 pvid;
129		129
130	if (port)	130	if (port)
131	pv = nbp_get_vlan_info(port);	131	pv = nbp_get_vlan_info(port);
132	else	132	else
133	pv = br_get_vlan_info(br);	133	pv = br_get_vlan_info(br);
134		134
135	if (!pv \|\| bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))	135	if (!pv \|\| bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
136	goto done;	136	goto done;
137		137
138	af = nla_nest_start(skb, IFLA_AF_SPEC);	138	af = nla_nest_start(skb, IFLA_AF_SPEC);
139	if (!af)	139	if (!af)
140	goto nla_put_failure;	140	goto nla_put_failure;
141		141
142	pvid = br_get_pvid(pv);	142	pvid = br_get_pvid(pv);
143	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {	143	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
144	vinfo.vid = vid;	144	vinfo.vid = vid;
145	vinfo.flags = 0;	145	vinfo.flags = 0;
146	if (vid == pvid)	146	if (vid == pvid)
147	vinfo.flags \|= BRIDGE_VLAN_INFO_PVID;	147	vinfo.flags \|= BRIDGE_VLAN_INFO_PVID;
148		148
149	if (test_bit(vid, pv->untagged_bitmap))	149	if (test_bit(vid, pv->untagged_bitmap))
150	vinfo.flags \|= BRIDGE_VLAN_INFO_UNTAGGED;	150	vinfo.flags \|= BRIDGE_VLAN_INFO_UNTAGGED;
151		151
152	if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,	152	if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
153	sizeof(vinfo), &vinfo))	153	sizeof(vinfo), &vinfo))
154	goto nla_put_failure;	154	goto nla_put_failure;
155	}	155	}
156		156
157	nla_nest_end(skb, af);	157	nla_nest_end(skb, af);
158	}	158	}
159		159
160	done:	160	done:
161	return nlmsg_end(skb, nlh);	161	return nlmsg_end(skb, nlh);
162		162
163	nla_put_failure:	163	nla_put_failure:
164	nlmsg_cancel(skb, nlh);	164	nlmsg_cancel(skb, nlh);
165	return -EMSGSIZE;	165	return -EMSGSIZE;
166	}	166	}
167		167
168	/*	168	/*
169	* Notify listeners of a change in port information	169	* Notify listeners of a change in port information
170	*/	170	*/
171	void br_ifinfo_notify(int event, struct net_bridge_port *port)	171	void br_ifinfo_notify(int event, struct net_bridge_port *port)
172	{	172	{
173	struct net *net;	173	struct net *net;
174	struct sk_buff *skb;	174	struct sk_buff *skb;
175	int err = -ENOBUFS;	175	int err = -ENOBUFS;
176		176
177	if (!port)	177	if (!port)
178	return;	178	return;
179		179
180	net = dev_net(port->dev);	180	net = dev_net(port->dev);
181	br_debug(port->br, "port %u(%s) event %d\n",	181	br_debug(port->br, "port %u(%s) event %d\n",
182	(unsigned int)port->port_no, port->dev->name, event);	182	(unsigned int)port->port_no, port->dev->name, event);
183		183
184	skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);	184	skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
185	if (skb == NULL)	185	if (skb == NULL)
186	goto errout;	186	goto errout;
187		187
188	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);	188	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
189	if (err < 0) {	189	if (err < 0) {
190	/* -EMSGSIZE implies BUG in br_nlmsg_size() */	190	/* -EMSGSIZE implies BUG in br_nlmsg_size() */
191	WARN_ON(err == -EMSGSIZE);	191	WARN_ON(err == -EMSGSIZE);
192	kfree_skb(skb);	192	kfree_skb(skb);
193	goto errout;	193	goto errout;
194	}	194	}
195	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);	195	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
196	return;	196	return;
197	errout:	197	errout:
198	if (err < 0)	198	if (err < 0)
199	rtnl_set_sk_err(net, RTNLGRP_LINK, err);	199	rtnl_set_sk_err(net, RTNLGRP_LINK, err);
200	}	200	}
201		201
202		202
203	/*	203	/*
204	* Dump information about all ports, in response to GETLINK	204	* Dump information about all ports, in response to GETLINK
205	*/	205	*/
206	int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,	206	int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
207	struct net_device *dev, u32 filter_mask)	207	struct net_device *dev, u32 filter_mask)
208	{	208	{
209	int err = 0;	209	int err = 0;
210	struct net_bridge_port *port = br_port_get_rtnl(dev);	210	struct net_bridge_port *port = br_port_get_rtnl(dev);
211		211
212	/* not a bridge port and */	212	/* not a bridge port and */
213	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))	213	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
214	goto out;	214	goto out;
215		215
216	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,	216	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
217	filter_mask, dev);	217	filter_mask, dev);
218	out:	218	out:
219	return err;	219	return err;
220	}	220	}
221		221
222	static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {	222	static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
223	[IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 },	223	[IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 },
224	[IFLA_BRIDGE_MODE] = { .type = NLA_U16 },	224	[IFLA_BRIDGE_MODE] = { .type = NLA_U16 },
225	[IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY,	225	[IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY,
226	.len = sizeof(struct bridge_vlan_info), },	226	.len = sizeof(struct bridge_vlan_info), },
227	};	227	};
228		228
229	static int br_afspec(struct net_bridge *br,	229	static int br_afspec(struct net_bridge *br,
230	struct net_bridge_port *p,	230	struct net_bridge_port *p,
231	struct nlattr *af_spec,	231	struct nlattr *af_spec,
232	int cmd)	232	int cmd)
233	{	233	{
234	struct nlattr *tb[IFLA_BRIDGE_MAX+1];	234	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
235	int err = 0;	235	int err = 0;
236		236
237	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);	237	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
238	if (err)	238	if (err)
239	return err;	239	return err;
240		240
241	if (tb[IFLA_BRIDGE_VLAN_INFO]) {	241	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
242	struct bridge_vlan_info *vinfo;	242	struct bridge_vlan_info *vinfo;
243		243
244	vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);	244	vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
245		245
246	if (!vinfo->vid \|\| vinfo->vid >= VLAN_VID_MASK)	246	if (!vinfo->vid \|\| vinfo->vid >= VLAN_VID_MASK)
247	return -EINVAL;	247	return -EINVAL;
248		248
249	switch (cmd) {	249	switch (cmd) {
250	case RTM_SETLINK:	250	case RTM_SETLINK:
251	if (p) {	251	if (p) {
252	err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);	252	err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
253	if (err)	253	if (err)
254	break;	254	break;
255		255
256	if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)	256	if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
257	err = br_vlan_add(p->br, vinfo->vid,	257	err = br_vlan_add(p->br, vinfo->vid,
258	vinfo->flags);	258	vinfo->flags);
259	} else	259	} else
260	err = br_vlan_add(br, vinfo->vid, vinfo->flags);	260	err = br_vlan_add(br, vinfo->vid, vinfo->flags);
261		261
262	if (err)	262	if (err)
263	break;	263	break;
264		264
265	break;	265	break;
266		266
267	case RTM_DELLINK:	267	case RTM_DELLINK:
268	if (p) {	268	if (p) {
269	nbp_vlan_delete(p, vinfo->vid);	269	nbp_vlan_delete(p, vinfo->vid);
270	if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)	270	if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
271	br_vlan_delete(p->br, vinfo->vid);	271	br_vlan_delete(p->br, vinfo->vid);
272	} else	272	} else
273	br_vlan_delete(br, vinfo->vid);	273	br_vlan_delete(br, vinfo->vid);
274	break;	274	break;
275	}	275	}
276	}	276	}
277		277
278	return err;	278	return err;
279	}	279	}
280		280
281	static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {	281	static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
282	[IFLA_BRPORT_STATE] = { .type = NLA_U8 },	282	[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
283	[IFLA_BRPORT_COST] = { .type = NLA_U32 },	283	[IFLA_BRPORT_COST] = { .type = NLA_U32 },
284	[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },	284	[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
285	[IFLA_BRPORT_MODE] = { .type = NLA_U8 },	285	[IFLA_BRPORT_MODE] = { .type = NLA_U8 },
286	[IFLA_BRPORT_GUARD] = { .type = NLA_U8 },	286	[IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
287	[IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },	287	[IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
288	[IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },	288	[IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
289	[IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },	289	[IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
290	};	290	};
291		291
292	/* Change the state of the port and notify spanning tree */	292	/* Change the state of the port and notify spanning tree */
293	static int br_set_port_state(struct net_bridge_port *p, u8 state)	293	static int br_set_port_state(struct net_bridge_port *p, u8 state)
294	{	294	{
295	if (state > BR_STATE_BLOCKING)	295	if (state > BR_STATE_BLOCKING)
296	return -EINVAL;	296	return -EINVAL;
297		297
298	/* if kernel STP is running, don't allow changes */	298	/* if kernel STP is running, don't allow changes */
299	if (p->br->stp_enabled == BR_KERNEL_STP)	299	if (p->br->stp_enabled == BR_KERNEL_STP)
300	return -EBUSY;	300	return -EBUSY;
301		301
302	/* if device is not up, change is not allowed	302	/* if device is not up, change is not allowed
303	* if link is not present, only allowable state is disabled	303	* if link is not present, only allowable state is disabled
304	*/	304	*/
305	if (!netif_running(p->dev) \|\|	305	if (!netif_running(p->dev) \|\|
306	(!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))	306	(!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))
307	return -ENETDOWN;	307	return -ENETDOWN;
308		308
309	p->state = state;	309	p->state = state;
310	br_log_state(p);	310	br_log_state(p);
311	br_port_state_selection(p->br);	311	br_port_state_selection(p->br);
312	return 0;	312	return 0;
313	}	313	}
314		314
315	/* Set/clear or port flags based on attribute */	315	/* Set/clear or port flags based on attribute */
316	static void br_set_port_flag(struct net_bridge_port p, struct nlattr tb[],	316	static void br_set_port_flag(struct net_bridge_port p, struct nlattr tb[],
317	int attrtype, unsigned long mask)	317	int attrtype, unsigned long mask)
318	{	318	{
319	if (tb[attrtype]) {	319	if (tb[attrtype]) {
320	u8 flag = nla_get_u8(tb[attrtype]);	320	u8 flag = nla_get_u8(tb[attrtype]);
321	if (flag)	321	if (flag)
322	p->flags \|= mask;	322	p->flags \|= mask;
323	else	323	else
324	p->flags &= ~mask;	324	p->flags &= ~mask;
325	}	325	}
326	}	326	}
327		327
328	/* Process bridge protocol info on port */	328	/* Process bridge protocol info on port */
329	static int br_setport(struct net_bridge_port p, struct nlattr tb[])	329	static int br_setport(struct net_bridge_port p, struct nlattr tb[])
330	{	330	{
331	int err;	331	int err;
332		332
333	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);	333	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
334	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);	334	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
335	br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);	335	br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
336	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);	336	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
337	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);	337	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
338	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);	338	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
339		339
340	if (tb[IFLA_BRPORT_COST]) {	340	if (tb[IFLA_BRPORT_COST]) {
341	err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));	341	err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
342	if (err)	342	if (err)
343	return err;	343	return err;
344	}	344	}
345		345
346	if (tb[IFLA_BRPORT_PRIORITY]) {	346	if (tb[IFLA_BRPORT_PRIORITY]) {
347	err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY]));	347	err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY]));
348	if (err)	348	if (err)
349	return err;	349	return err;
350	}	350	}
351		351
352	if (tb[IFLA_BRPORT_STATE]) {	352	if (tb[IFLA_BRPORT_STATE]) {
353	err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE]));	353	err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE]));
354	if (err)	354	if (err)
355	return err;	355	return err;
356	}	356	}
357	return 0;	357	return 0;
358	}	358	}
359		359
360	/* Change state and parameters on port. */	360	/* Change state and parameters on port. */
361	int br_setlink(struct net_device dev, struct nlmsghdr nlh)	361	int br_setlink(struct net_device dev, struct nlmsghdr nlh)
362	{	362	{
363	struct nlattr *protinfo;	363	struct nlattr *protinfo;
364	struct nlattr *afspec;	364	struct nlattr *afspec;
365	struct net_bridge_port *p;	365	struct net_bridge_port *p;
366	struct nlattr *tb[IFLA_BRPORT_MAX + 1];	366	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
367	int err = 0;	367	int err = 0;
368		368
369	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);	369	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
370	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);	370	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
371	if (!protinfo && !afspec)	371	if (!protinfo && !afspec)
372	return 0;	372	return 0;
373		373
374	p = br_port_get_rtnl(dev);	374	p = br_port_get_rtnl(dev);
375	/* We want to accept dev as bridge itself if the AF_SPEC	375	/* We want to accept dev as bridge itself if the AF_SPEC
376	* is set to see if someone is setting vlan info on the brigde	376	* is set to see if someone is setting vlan info on the bridge
377	*/	377	*/
378	if (!p && !afspec)	378	if (!p && !afspec)
379	return -EINVAL;	379	return -EINVAL;
380		380
381	if (p && protinfo) {	381	if (p && protinfo) {
382	if (protinfo->nla_type & NLA_F_NESTED) {	382	if (protinfo->nla_type & NLA_F_NESTED) {
383	err = nla_parse_nested(tb, IFLA_BRPORT_MAX,	383	err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
384	protinfo, ifla_brport_policy);	384	protinfo, ifla_brport_policy);
385	if (err)	385	if (err)
386	return err;	386	return err;
387		387
388	spin_lock_bh(&p->br->lock);	388	spin_lock_bh(&p->br->lock);
389	err = br_setport(p, tb);	389	err = br_setport(p, tb);
390	spin_unlock_bh(&p->br->lock);	390	spin_unlock_bh(&p->br->lock);
391	} else {	391	} else {
392	/* Binary compatability with old RSTP */	392	/* Binary compatibility with old RSTP */
393	if (nla_len(protinfo) < sizeof(u8))	393	if (nla_len(protinfo) < sizeof(u8))
394	return -EINVAL;	394	return -EINVAL;
395		395
396	spin_lock_bh(&p->br->lock);	396	spin_lock_bh(&p->br->lock);
397	err = br_set_port_state(p, nla_get_u8(protinfo));	397	err = br_set_port_state(p, nla_get_u8(protinfo));
398	spin_unlock_bh(&p->br->lock);	398	spin_unlock_bh(&p->br->lock);
399	}	399	}
400	if (err)	400	if (err)
401	goto out;	401	goto out;
402	}	402	}
403		403
404	if (afspec) {	404	if (afspec) {
405	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,	405	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
406	afspec, RTM_SETLINK);	406	afspec, RTM_SETLINK);
407	}	407	}
408		408
409	if (err == 0)	409	if (err == 0)
410	br_ifinfo_notify(RTM_NEWLINK, p);	410	br_ifinfo_notify(RTM_NEWLINK, p);
411		411
412	out:	412	out:
413	return err;	413	return err;
414	}	414	}
415		415
416	/* Delete port information */	416	/* Delete port information */
417	int br_dellink(struct net_device dev, struct nlmsghdr nlh)	417	int br_dellink(struct net_device dev, struct nlmsghdr nlh)
418	{	418	{
419	struct nlattr *afspec;	419	struct nlattr *afspec;
420	struct net_bridge_port *p;	420	struct net_bridge_port *p;
421	int err;	421	int err;
422		422
423	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);	423	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
424	if (!afspec)	424	if (!afspec)
425	return 0;	425	return 0;
426		426
427	p = br_port_get_rtnl(dev);	427	p = br_port_get_rtnl(dev);
428	/* We want to accept dev as bridge itself as well */	428	/* We want to accept dev as bridge itself as well */
429	if (!p && !(dev->priv_flags & IFF_EBRIDGE))	429	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
430	return -EINVAL;	430	return -EINVAL;
431		431
432	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,	432	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
433	afspec, RTM_DELLINK);	433	afspec, RTM_DELLINK);
434		434
435	return err;	435	return err;
436	}	436	}
437	static int br_validate(struct nlattr tb[], struct nlattr data[])	437	static int br_validate(struct nlattr tb[], struct nlattr data[])
438	{	438	{
439	if (tb[IFLA_ADDRESS]) {	439	if (tb[IFLA_ADDRESS]) {
440	if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)	440	if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
441	return -EINVAL;	441	return -EINVAL;
442	if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))	442	if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
443	return -EADDRNOTAVAIL;	443	return -EADDRNOTAVAIL;
444	}	444	}
445		445
446	return 0;	446	return 0;
447	}	447	}
448		448
449	static size_t br_get_link_af_size(const struct net_device *dev)	449	static size_t br_get_link_af_size(const struct net_device *dev)
450	{	450	{
451	struct net_port_vlans *pv;	451	struct net_port_vlans *pv;
452		452
453	if (br_port_exists(dev))	453	if (br_port_exists(dev))
454	pv = nbp_get_vlan_info(br_port_get_rtnl(dev));	454	pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
455	else if (dev->priv_flags & IFF_EBRIDGE)	455	else if (dev->priv_flags & IFF_EBRIDGE)
456	pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));	456	pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
457	else	457	else
458	return 0;	458	return 0;
459		459
460	if (!pv)	460	if (!pv)
461	return 0;	461	return 0;
462		462
463	/* Each VLAN is returned in bridge_vlan_info along with flags */	463	/* Each VLAN is returned in bridge_vlan_info along with flags */
464	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));	464	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
465	}	465	}
466		466
467	static struct rtnl_af_ops br_af_ops = {	467	static struct rtnl_af_ops br_af_ops = {
468	.family = AF_BRIDGE,	468	.family = AF_BRIDGE,
469	.get_link_af_size = br_get_link_af_size,	469	.get_link_af_size = br_get_link_af_size,
470	};	470	};
471		471
472	struct rtnl_link_ops br_link_ops __read_mostly = {	472	struct rtnl_link_ops br_link_ops __read_mostly = {
473	.kind = "bridge",	473	.kind = "bridge",
474	.priv_size = sizeof(struct net_bridge),	474	.priv_size = sizeof(struct net_bridge),
475	.setup = br_dev_setup,	475	.setup = br_dev_setup,
476	.validate = br_validate,	476	.validate = br_validate,
477	.dellink = br_dev_delete,	477	.dellink = br_dev_delete,
478	};	478	};
479		479
480	int __init br_netlink_init(void)	480	int __init br_netlink_init(void)
481	{	481	{
482	int err;	482	int err;
483		483
484	br_mdb_init();	484	br_mdb_init();
485	err = rtnl_af_register(&br_af_ops);	485	err = rtnl_af_register(&br_af_ops);
486	if (err)	486	if (err)
487	goto out;	487	goto out;
488		488
489	err = rtnl_link_register(&br_link_ops);	489	err = rtnl_link_register(&br_link_ops);
490	if (err)	490	if (err)
491	goto out_af;	491	goto out_af;
492		492
493	return 0;	493	return 0;
494		494
495	out_af:	495	out_af:
496	rtnl_af_unregister(&br_af_ops);	496	rtnl_af_unregister(&br_af_ops);
497	out:	497	out:
498	br_mdb_uninit();	498	br_mdb_uninit();
499	return err;	499	return err;
500	}	500	}
501		501
502	void __exit br_netlink_fini(void)	502	void __exit br_netlink_fini(void)
503	{	503	{
504	br_mdb_uninit();	504	br_mdb_uninit();
505	rtnl_af_unregister(&br_af_ops);	505	rtnl_af_unregister(&br_af_ops);
506	rtnl_link_unregister(&br_link_ops);	506	rtnl_link_unregister(&br_link_ops);
507	}	507	}
508		508

net/core/net-sysfs.c

Diff comments View file @ 8e3bff9

1	/*	1	/*
2	* net-sysfs.c - network device class and attributes	2	* net-sysfs.c - network device class and attributes
3	*	3	*
4	* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>	4	* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
5	*	5	*
6	* This program is free software; you can redistribute it and/or	6	* This program is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU General Public License	7	* modify it under the terms of the GNU General Public License
8	* as published by the Free Software Foundation; either version	8	* as published by the Free Software Foundation; either version
9	* 2 of the License, or (at your option) any later version.	9	* 2 of the License, or (at your option) any later version.
10	*/	10	*/
11		11
12	#include <linux/capability.h>	12	#include <linux/capability.h>
13	#include <linux/kernel.h>	13	#include <linux/kernel.h>
14	#include <linux/netdevice.h>	14	#include <linux/netdevice.h>
15	#include <linux/if_arp.h>	15	#include <linux/if_arp.h>
16	#include <linux/slab.h>	16	#include <linux/slab.h>
17	#include <linux/nsproxy.h>	17	#include <linux/nsproxy.h>
18	#include <net/sock.h>	18	#include <net/sock.h>
19	#include <net/net_namespace.h>	19	#include <net/net_namespace.h>
20	#include <linux/rtnetlink.h>	20	#include <linux/rtnetlink.h>
21	#include <linux/vmalloc.h>	21	#include <linux/vmalloc.h>
22	#include <linux/export.h>	22	#include <linux/export.h>
23	#include <linux/jiffies.h>	23	#include <linux/jiffies.h>
24	#include <linux/pm_runtime.h>	24	#include <linux/pm_runtime.h>
25		25
26	#include "net-sysfs.h"	26	#include "net-sysfs.h"
27		27
28	#ifdef CONFIG_SYSFS	28	#ifdef CONFIG_SYSFS
29	static const char fmt_hex[] = "%#x\n";	29	static const char fmt_hex[] = "%#x\n";
30	static const char fmt_long_hex[] = "%#lx\n";	30	static const char fmt_long_hex[] = "%#lx\n";
31	static const char fmt_dec[] = "%d\n";	31	static const char fmt_dec[] = "%d\n";
32	static const char fmt_udec[] = "%u\n";	32	static const char fmt_udec[] = "%u\n";
33	static const char fmt_ulong[] = "%lu\n";	33	static const char fmt_ulong[] = "%lu\n";
34	static const char fmt_u64[] = "%llu\n";	34	static const char fmt_u64[] = "%llu\n";
35		35
36	static inline int dev_isalive(const struct net_device *dev)	36	static inline int dev_isalive(const struct net_device *dev)
37	{	37	{
38	return dev->reg_state <= NETREG_REGISTERED;	38	return dev->reg_state <= NETREG_REGISTERED;
39	}	39	}
40		40
41	/* use same locking rules as GIF* ioctl's */	41	/* use same locking rules as GIF* ioctl's */
42	static ssize_t netdev_show(const struct device *dev,	42	static ssize_t netdev_show(const struct device *dev,
43	struct device_attribute attr, char buf,	43	struct device_attribute attr, char buf,
44	ssize_t (format)(const struct net_device , char *))	44	ssize_t (format)(const struct net_device , char *))
45	{	45	{
46	struct net_device *net = to_net_dev(dev);	46	struct net_device *net = to_net_dev(dev);
47	ssize_t ret = -EINVAL;	47	ssize_t ret = -EINVAL;
48		48
49	read_lock(&dev_base_lock);	49	read_lock(&dev_base_lock);
50	if (dev_isalive(net))	50	if (dev_isalive(net))
51	ret = (*format)(net, buf);	51	ret = (*format)(net, buf);
52	read_unlock(&dev_base_lock);	52	read_unlock(&dev_base_lock);
53		53
54	return ret;	54	return ret;
55	}	55	}
56		56
57	/* generate a show function for simple field */	57	/* generate a show function for simple field */
58	#define NETDEVICE_SHOW(field, format_string) \	58	#define NETDEVICE_SHOW(field, format_string) \
59	static ssize_t format_##field(const struct net_device net, char buf) \	59	static ssize_t format_##field(const struct net_device net, char buf) \
60	{ \	60	{ \
61	return sprintf(buf, format_string, net->field); \	61	return sprintf(buf, format_string, net->field); \
62	} \	62	} \
63	static ssize_t field##_show(struct device *dev, \	63	static ssize_t field##_show(struct device *dev, \
64	struct device_attribute attr, char buf) \	64	struct device_attribute attr, char buf) \
65	{ \	65	{ \
66	return netdev_show(dev, attr, buf, format_##field); \	66	return netdev_show(dev, attr, buf, format_##field); \
67	} \	67	} \
68		68
69	#define NETDEVICE_SHOW_RO(field, format_string) \	69	#define NETDEVICE_SHOW_RO(field, format_string) \
70	NETDEVICE_SHOW(field, format_string); \	70	NETDEVICE_SHOW(field, format_string); \
71	static DEVICE_ATTR_RO(field)	71	static DEVICE_ATTR_RO(field)
72		72
73	#define NETDEVICE_SHOW_RW(field, format_string) \	73	#define NETDEVICE_SHOW_RW(field, format_string) \
74	NETDEVICE_SHOW(field, format_string); \	74	NETDEVICE_SHOW(field, format_string); \
75	static DEVICE_ATTR_RW(field)	75	static DEVICE_ATTR_RW(field)
76		76
77	/* use same locking and permission rules as SIF* ioctl's */	77	/* use same locking and permission rules as SIF* ioctl's */
78	static ssize_t netdev_store(struct device dev, struct device_attribute attr,	78	static ssize_t netdev_store(struct device dev, struct device_attribute attr,
79	const char *buf, size_t len,	79	const char *buf, size_t len,
80	int (set)(struct net_device , unsigned long))	80	int (set)(struct net_device , unsigned long))
81	{	81	{
82	struct net_device *netdev = to_net_dev(dev);	82	struct net_device *netdev = to_net_dev(dev);
83	struct net *net = dev_net(netdev);	83	struct net *net = dev_net(netdev);
84	unsigned long new;	84	unsigned long new;
85	int ret = -EINVAL;	85	int ret = -EINVAL;
86		86
87	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))	87	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
88	return -EPERM;	88	return -EPERM;
89		89
90	ret = kstrtoul(buf, 0, &new);	90	ret = kstrtoul(buf, 0, &new);
91	if (ret)	91	if (ret)
92	goto err;	92	goto err;
93		93
94	if (!rtnl_trylock())	94	if (!rtnl_trylock())
95	return restart_syscall();	95	return restart_syscall();
96		96
97	if (dev_isalive(netdev)) {	97	if (dev_isalive(netdev)) {
98	if ((ret = (*set)(netdev, new)) == 0)	98	if ((ret = (*set)(netdev, new)) == 0)
99	ret = len;	99	ret = len;
100	}	100	}
101	rtnl_unlock();	101	rtnl_unlock();
102	err:	102	err:
103	return ret;	103	return ret;
104	}	104	}
105		105
106	NETDEVICE_SHOW_RO(dev_id, fmt_hex);	106	NETDEVICE_SHOW_RO(dev_id, fmt_hex);
107	NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);	107	NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
108	NETDEVICE_SHOW_RO(addr_len, fmt_dec);	108	NETDEVICE_SHOW_RO(addr_len, fmt_dec);
109	NETDEVICE_SHOW_RO(iflink, fmt_dec);	109	NETDEVICE_SHOW_RO(iflink, fmt_dec);
110	NETDEVICE_SHOW_RO(ifindex, fmt_dec);	110	NETDEVICE_SHOW_RO(ifindex, fmt_dec);
111	NETDEVICE_SHOW_RO(type, fmt_dec);	111	NETDEVICE_SHOW_RO(type, fmt_dec);
112	NETDEVICE_SHOW_RO(link_mode, fmt_dec);	112	NETDEVICE_SHOW_RO(link_mode, fmt_dec);
113		113
114	/* use same locking rules as GIFHWADDR ioctl's */	114	/* use same locking rules as GIFHWADDR ioctl's */
115	static ssize_t address_show(struct device dev, struct device_attribute attr,	115	static ssize_t address_show(struct device dev, struct device_attribute attr,
116	char *buf)	116	char *buf)
117	{	117	{
118	struct net_device *net = to_net_dev(dev);	118	struct net_device *net = to_net_dev(dev);
119	ssize_t ret = -EINVAL;	119	ssize_t ret = -EINVAL;
120		120
121	read_lock(&dev_base_lock);	121	read_lock(&dev_base_lock);
122	if (dev_isalive(net))	122	if (dev_isalive(net))
123	ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);	123	ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
124	read_unlock(&dev_base_lock);	124	read_unlock(&dev_base_lock);
125	return ret;	125	return ret;
126	}	126	}
127	static DEVICE_ATTR_RO(address);	127	static DEVICE_ATTR_RO(address);
128		128
129	static ssize_t broadcast_show(struct device *dev,	129	static ssize_t broadcast_show(struct device *dev,
130	struct device_attribute attr, char buf)	130	struct device_attribute attr, char buf)
131	{	131	{
132	struct net_device *net = to_net_dev(dev);	132	struct net_device *net = to_net_dev(dev);
133	if (dev_isalive(net))	133	if (dev_isalive(net))
134	return sysfs_format_mac(buf, net->broadcast, net->addr_len);	134	return sysfs_format_mac(buf, net->broadcast, net->addr_len);
135	return -EINVAL;	135	return -EINVAL;
136	}	136	}
137	static DEVICE_ATTR_RO(broadcast);	137	static DEVICE_ATTR_RO(broadcast);
138		138
139	static int change_carrier(struct net_device *net, unsigned long new_carrier)	139	static int change_carrier(struct net_device *net, unsigned long new_carrier)
140	{	140	{
141	if (!netif_running(net))	141	if (!netif_running(net))
142	return -EINVAL;	142	return -EINVAL;
143	return dev_change_carrier(net, (bool) new_carrier);	143	return dev_change_carrier(net, (bool) new_carrier);
144	}	144	}
145		145
146	static ssize_t carrier_store(struct device dev, struct device_attribute attr,	146	static ssize_t carrier_store(struct device dev, struct device_attribute attr,
147	const char *buf, size_t len)	147	const char *buf, size_t len)
148	{	148	{
149	return netdev_store(dev, attr, buf, len, change_carrier);	149	return netdev_store(dev, attr, buf, len, change_carrier);
150	}	150	}
151		151
152	static ssize_t carrier_show(struct device *dev,	152	static ssize_t carrier_show(struct device *dev,
153	struct device_attribute attr, char buf)	153	struct device_attribute attr, char buf)
154	{	154	{
155	struct net_device *netdev = to_net_dev(dev);	155	struct net_device *netdev = to_net_dev(dev);
156	if (netif_running(netdev)) {	156	if (netif_running(netdev)) {
157	return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));	157	return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
158	}	158	}
159	return -EINVAL;	159	return -EINVAL;
160	}	160	}
161	static DEVICE_ATTR_RW(carrier);	161	static DEVICE_ATTR_RW(carrier);
162		162
163	static ssize_t speed_show(struct device *dev,	163	static ssize_t speed_show(struct device *dev,
164	struct device_attribute attr, char buf)	164	struct device_attribute attr, char buf)
165	{	165	{
166	struct net_device *netdev = to_net_dev(dev);	166	struct net_device *netdev = to_net_dev(dev);
167	int ret = -EINVAL;	167	int ret = -EINVAL;
168		168
169	if (!rtnl_trylock())	169	if (!rtnl_trylock())
170	return restart_syscall();	170	return restart_syscall();
171		171
172	if (netif_running(netdev)) {	172	if (netif_running(netdev)) {
173	struct ethtool_cmd cmd;	173	struct ethtool_cmd cmd;
174	if (!__ethtool_get_settings(netdev, &cmd))	174	if (!__ethtool_get_settings(netdev, &cmd))
175	ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));	175	ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
176	}	176	}
177	rtnl_unlock();	177	rtnl_unlock();
178	return ret;	178	return ret;
179	}	179	}
180	static DEVICE_ATTR_RO(speed);	180	static DEVICE_ATTR_RO(speed);
181		181
182	static ssize_t duplex_show(struct device *dev,	182	static ssize_t duplex_show(struct device *dev,
183	struct device_attribute attr, char buf)	183	struct device_attribute attr, char buf)
184	{	184	{
185	struct net_device *netdev = to_net_dev(dev);	185	struct net_device *netdev = to_net_dev(dev);
186	int ret = -EINVAL;	186	int ret = -EINVAL;
187		187
188	if (!rtnl_trylock())	188	if (!rtnl_trylock())
189	return restart_syscall();	189	return restart_syscall();
190		190
191	if (netif_running(netdev)) {	191	if (netif_running(netdev)) {
192	struct ethtool_cmd cmd;	192	struct ethtool_cmd cmd;
193	if (!__ethtool_get_settings(netdev, &cmd)) {	193	if (!__ethtool_get_settings(netdev, &cmd)) {
194	const char *duplex;	194	const char *duplex;
195	switch (cmd.duplex) {	195	switch (cmd.duplex) {
196	case DUPLEX_HALF:	196	case DUPLEX_HALF:
197	duplex = "half";	197	duplex = "half";
198	break;	198	break;
199	case DUPLEX_FULL:	199	case DUPLEX_FULL:
200	duplex = "full";	200	duplex = "full";
201	break;	201	break;
202	default:	202	default:
203	duplex = "unknown";	203	duplex = "unknown";
204	break;	204	break;
205	}	205	}
206	ret = sprintf(buf, "%s\n", duplex);	206	ret = sprintf(buf, "%s\n", duplex);
207	}	207	}
208	}	208	}
209	rtnl_unlock();	209	rtnl_unlock();
210	return ret;	210	return ret;
211	}	211	}
212	static DEVICE_ATTR_RO(duplex);	212	static DEVICE_ATTR_RO(duplex);
213		213
214	static ssize_t dormant_show(struct device *dev,	214	static ssize_t dormant_show(struct device *dev,
215	struct device_attribute attr, char buf)	215	struct device_attribute attr, char buf)
216	{	216	{
217	struct net_device *netdev = to_net_dev(dev);	217	struct net_device *netdev = to_net_dev(dev);
218		218
219	if (netif_running(netdev))	219	if (netif_running(netdev))
220	return sprintf(buf, fmt_dec, !!netif_dormant(netdev));	220	return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
221		221
222	return -EINVAL;	222	return -EINVAL;
223	}	223	}
224	static DEVICE_ATTR_RO(dormant);	224	static DEVICE_ATTR_RO(dormant);
225		225
226	static const char *const operstates[] = {	226	static const char *const operstates[] = {
227	"unknown",	227	"unknown",
228	"notpresent", /* currently unused */	228	"notpresent", /* currently unused */
229	"down",	229	"down",
230	"lowerlayerdown",	230	"lowerlayerdown",
231	"testing", /* currently unused */	231	"testing", /* currently unused */
232	"dormant",	232	"dormant",
233	"up"	233	"up"
234	};	234	};
235		235
236	static ssize_t operstate_show(struct device *dev,	236	static ssize_t operstate_show(struct device *dev,
237	struct device_attribute attr, char buf)	237	struct device_attribute attr, char buf)
238	{	238	{
239	const struct net_device *netdev = to_net_dev(dev);	239	const struct net_device *netdev = to_net_dev(dev);
240	unsigned char operstate;	240	unsigned char operstate;
241		241
242	read_lock(&dev_base_lock);	242	read_lock(&dev_base_lock);
243	operstate = netdev->operstate;	243	operstate = netdev->operstate;
244	if (!netif_running(netdev))	244	if (!netif_running(netdev))
245	operstate = IF_OPER_DOWN;	245	operstate = IF_OPER_DOWN;
246	read_unlock(&dev_base_lock);	246	read_unlock(&dev_base_lock);
247		247
248	if (operstate >= ARRAY_SIZE(operstates))	248	if (operstate >= ARRAY_SIZE(operstates))
249	return -EINVAL; /* should not happen */	249	return -EINVAL; /* should not happen */
250		250
251	return sprintf(buf, "%s\n", operstates[operstate]);	251	return sprintf(buf, "%s\n", operstates[operstate]);
252	}	252	}
253	static DEVICE_ATTR_RO(operstate);	253	static DEVICE_ATTR_RO(operstate);
254		254
255	/* read-write attributes */	255	/* read-write attributes */
256		256
257	static int change_mtu(struct net_device *net, unsigned long new_mtu)	257	static int change_mtu(struct net_device *net, unsigned long new_mtu)
258	{	258	{
259	return dev_set_mtu(net, (int) new_mtu);	259	return dev_set_mtu(net, (int) new_mtu);
260	}	260	}
261		261
262	static ssize_t mtu_store(struct device dev, struct device_attribute attr,	262	static ssize_t mtu_store(struct device dev, struct device_attribute attr,
263	const char *buf, size_t len)	263	const char *buf, size_t len)
264	{	264	{
265	return netdev_store(dev, attr, buf, len, change_mtu);	265	return netdev_store(dev, attr, buf, len, change_mtu);
266	}	266	}
267	NETDEVICE_SHOW_RW(mtu, fmt_dec);	267	NETDEVICE_SHOW_RW(mtu, fmt_dec);
268		268
269	static int change_flags(struct net_device *net, unsigned long new_flags)	269	static int change_flags(struct net_device *net, unsigned long new_flags)
270	{	270	{
271	return dev_change_flags(net, (unsigned int) new_flags);	271	return dev_change_flags(net, (unsigned int) new_flags);
272	}	272	}
273		273
274	static ssize_t flags_store(struct device dev, struct device_attribute attr,	274	static ssize_t flags_store(struct device dev, struct device_attribute attr,
275	const char *buf, size_t len)	275	const char *buf, size_t len)
276	{	276	{
277	return netdev_store(dev, attr, buf, len, change_flags);	277	return netdev_store(dev, attr, buf, len, change_flags);
278	}	278	}
279	NETDEVICE_SHOW_RW(flags, fmt_hex);	279	NETDEVICE_SHOW_RW(flags, fmt_hex);
280		280
281	static int change_tx_queue_len(struct net_device *net, unsigned long new_len)	281	static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
282	{	282	{
283	net->tx_queue_len = new_len;	283	net->tx_queue_len = new_len;
284	return 0;	284	return 0;
285	}	285	}
286		286
287	static ssize_t tx_queue_len_store(struct device *dev,	287	static ssize_t tx_queue_len_store(struct device *dev,
288	struct device_attribute *attr,	288	struct device_attribute *attr,
289	const char *buf, size_t len)	289	const char *buf, size_t len)
290	{	290	{
291	if (!capable(CAP_NET_ADMIN))	291	if (!capable(CAP_NET_ADMIN))
292	return -EPERM;	292	return -EPERM;
293		293
294	return netdev_store(dev, attr, buf, len, change_tx_queue_len);	294	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
295	}	295	}
296	NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);	296	NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
297		297
298	static ssize_t ifalias_store(struct device dev, struct device_attribute attr,	298	static ssize_t ifalias_store(struct device dev, struct device_attribute attr,
299	const char *buf, size_t len)	299	const char *buf, size_t len)
300	{	300	{
301	struct net_device *netdev = to_net_dev(dev);	301	struct net_device *netdev = to_net_dev(dev);
302	struct net *net = dev_net(netdev);	302	struct net *net = dev_net(netdev);
303	size_t count = len;	303	size_t count = len;
304	ssize_t ret;	304	ssize_t ret;
305		305
306	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))	306	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
307	return -EPERM;	307	return -EPERM;
308		308
309	/* ignore trailing newline */	309	/* ignore trailing newline */
310	if (len > 0 && buf[len - 1] == '\n')	310	if (len > 0 && buf[len - 1] == '\n')
311	--count;	311	--count;
312		312
313	if (!rtnl_trylock())	313	if (!rtnl_trylock())
314	return restart_syscall();	314	return restart_syscall();
315	ret = dev_set_alias(netdev, buf, count);	315	ret = dev_set_alias(netdev, buf, count);
316	rtnl_unlock();	316	rtnl_unlock();
317		317
318	return ret < 0 ? ret : len;	318	return ret < 0 ? ret : len;
319	}	319	}
320		320
321	static ssize_t ifalias_show(struct device *dev,	321	static ssize_t ifalias_show(struct device *dev,
322	struct device_attribute attr, char buf)	322	struct device_attribute attr, char buf)
323	{	323	{
324	const struct net_device *netdev = to_net_dev(dev);	324	const struct net_device *netdev = to_net_dev(dev);
325	ssize_t ret = 0;	325	ssize_t ret = 0;
326		326
327	if (!rtnl_trylock())	327	if (!rtnl_trylock())
328	return restart_syscall();	328	return restart_syscall();
329	if (netdev->ifalias)	329	if (netdev->ifalias)
330	ret = sprintf(buf, "%s\n", netdev->ifalias);	330	ret = sprintf(buf, "%s\n", netdev->ifalias);
331	rtnl_unlock();	331	rtnl_unlock();
332	return ret;	332	return ret;
333	}	333	}
334	static DEVICE_ATTR_RW(ifalias);	334	static DEVICE_ATTR_RW(ifalias);
335		335
336	static int change_group(struct net_device *net, unsigned long new_group)	336	static int change_group(struct net_device *net, unsigned long new_group)
337	{	337	{
338	dev_set_group(net, (int) new_group);	338	dev_set_group(net, (int) new_group);
339	return 0;	339	return 0;
340	}	340	}
341		341
342	static ssize_t group_store(struct device dev, struct device_attribute attr,	342	static ssize_t group_store(struct device dev, struct device_attribute attr,
343	const char *buf, size_t len)	343	const char *buf, size_t len)
344	{	344	{
345	return netdev_store(dev, attr, buf, len, change_group);	345	return netdev_store(dev, attr, buf, len, change_group);
346	}	346	}
347	NETDEVICE_SHOW(group, fmt_dec);	347	NETDEVICE_SHOW(group, fmt_dec);
348	static DEVICE_ATTR(netdev_group, S_IRUGO \| S_IWUSR, group_show, group_store);	348	static DEVICE_ATTR(netdev_group, S_IRUGO \| S_IWUSR, group_show, group_store);
349		349
350	static ssize_t phys_port_id_show(struct device *dev,	350	static ssize_t phys_port_id_show(struct device *dev,
351	struct device_attribute attr, char buf)	351	struct device_attribute attr, char buf)
352	{	352	{
353	struct net_device *netdev = to_net_dev(dev);	353	struct net_device *netdev = to_net_dev(dev);
354	ssize_t ret = -EINVAL;	354	ssize_t ret = -EINVAL;
355		355
356	if (!rtnl_trylock())	356	if (!rtnl_trylock())
357	return restart_syscall();	357	return restart_syscall();
358		358
359	if (dev_isalive(netdev)) {	359	if (dev_isalive(netdev)) {
360	struct netdev_phys_port_id ppid;	360	struct netdev_phys_port_id ppid;
361		361
362	ret = dev_get_phys_port_id(netdev, &ppid);	362	ret = dev_get_phys_port_id(netdev, &ppid);
363	if (!ret)	363	if (!ret)
364	ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);	364	ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
365	}	365	}
366	rtnl_unlock();	366	rtnl_unlock();
367		367
368	return ret;	368	return ret;
369	}	369	}
370	static DEVICE_ATTR_RO(phys_port_id);	370	static DEVICE_ATTR_RO(phys_port_id);
371		371
372	static struct attribute *net_class_attrs[] = {	372	static struct attribute *net_class_attrs[] = {
373	&dev_attr_netdev_group.attr,	373	&dev_attr_netdev_group.attr,
374	&dev_attr_type.attr,	374	&dev_attr_type.attr,
375	&dev_attr_dev_id.attr,	375	&dev_attr_dev_id.attr,
376	&dev_attr_iflink.attr,	376	&dev_attr_iflink.attr,
377	&dev_attr_ifindex.attr,	377	&dev_attr_ifindex.attr,
378	&dev_attr_addr_assign_type.attr,	378	&dev_attr_addr_assign_type.attr,
379	&dev_attr_addr_len.attr,	379	&dev_attr_addr_len.attr,
380	&dev_attr_link_mode.attr,	380	&dev_attr_link_mode.attr,
381	&dev_attr_address.attr,	381	&dev_attr_address.attr,
382	&dev_attr_broadcast.attr,	382	&dev_attr_broadcast.attr,
383	&dev_attr_speed.attr,	383	&dev_attr_speed.attr,
384	&dev_attr_duplex.attr,	384	&dev_attr_duplex.attr,
385	&dev_attr_dormant.attr,	385	&dev_attr_dormant.attr,
386	&dev_attr_operstate.attr,	386	&dev_attr_operstate.attr,
387	&dev_attr_ifalias.attr,	387	&dev_attr_ifalias.attr,
388	&dev_attr_carrier.attr,	388	&dev_attr_carrier.attr,
389	&dev_attr_mtu.attr,	389	&dev_attr_mtu.attr,
390	&dev_attr_flags.attr,	390	&dev_attr_flags.attr,
391	&dev_attr_tx_queue_len.attr,	391	&dev_attr_tx_queue_len.attr,
392	&dev_attr_phys_port_id.attr,	392	&dev_attr_phys_port_id.attr,
393	NULL,	393	NULL,
394	};	394	};
395	ATTRIBUTE_GROUPS(net_class);	395	ATTRIBUTE_GROUPS(net_class);
396		396
397	/* Show a given an attribute in the statistics group */	397	/* Show a given an attribute in the statistics group */
398	static ssize_t netstat_show(const struct device *d,	398	static ssize_t netstat_show(const struct device *d,
399	struct device_attribute attr, char buf,	399	struct device_attribute attr, char buf,
400	unsigned long offset)	400	unsigned long offset)
401	{	401	{
402	struct net_device *dev = to_net_dev(d);	402	struct net_device *dev = to_net_dev(d);
403	ssize_t ret = -EINVAL;	403	ssize_t ret = -EINVAL;
404		404
405	WARN_ON(offset > sizeof(struct rtnl_link_stats64) \|\|	405	WARN_ON(offset > sizeof(struct rtnl_link_stats64) \|\|
406	offset % sizeof(u64) != 0);	406	offset % sizeof(u64) != 0);
407		407
408	read_lock(&dev_base_lock);	408	read_lock(&dev_base_lock);
409	if (dev_isalive(dev)) {	409	if (dev_isalive(dev)) {
410	struct rtnl_link_stats64 temp;	410	struct rtnl_link_stats64 temp;
411	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);	411	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
412		412
413	ret = sprintf(buf, fmt_u64, (u64 )(((u8 *) stats) + offset));	413	ret = sprintf(buf, fmt_u64, (u64 )(((u8 *) stats) + offset));
414	}	414	}
415	read_unlock(&dev_base_lock);	415	read_unlock(&dev_base_lock);
416	return ret;	416	return ret;
417	}	417	}
418		418
419	/* generate a read-only statistics attribute */	419	/* generate a read-only statistics attribute */
420	#define NETSTAT_ENTRY(name) \	420	#define NETSTAT_ENTRY(name) \
421	static ssize_t name##_show(struct device *d, \	421	static ssize_t name##_show(struct device *d, \
422	struct device_attribute attr, char buf) \	422	struct device_attribute attr, char buf) \
423	{ \	423	{ \
424	return netstat_show(d, attr, buf, \	424	return netstat_show(d, attr, buf, \
425	offsetof(struct rtnl_link_stats64, name)); \	425	offsetof(struct rtnl_link_stats64, name)); \
426	} \	426	} \
427	static DEVICE_ATTR_RO(name)	427	static DEVICE_ATTR_RO(name)
428		428
429	NETSTAT_ENTRY(rx_packets);	429	NETSTAT_ENTRY(rx_packets);
430	NETSTAT_ENTRY(tx_packets);	430	NETSTAT_ENTRY(tx_packets);
431	NETSTAT_ENTRY(rx_bytes);	431	NETSTAT_ENTRY(rx_bytes);
432	NETSTAT_ENTRY(tx_bytes);	432	NETSTAT_ENTRY(tx_bytes);
433	NETSTAT_ENTRY(rx_errors);	433	NETSTAT_ENTRY(rx_errors);
434	NETSTAT_ENTRY(tx_errors);	434	NETSTAT_ENTRY(tx_errors);
435	NETSTAT_ENTRY(rx_dropped);	435	NETSTAT_ENTRY(rx_dropped);
436	NETSTAT_ENTRY(tx_dropped);	436	NETSTAT_ENTRY(tx_dropped);
437	NETSTAT_ENTRY(multicast);	437	NETSTAT_ENTRY(multicast);
438	NETSTAT_ENTRY(collisions);	438	NETSTAT_ENTRY(collisions);
439	NETSTAT_ENTRY(rx_length_errors);	439	NETSTAT_ENTRY(rx_length_errors);
440	NETSTAT_ENTRY(rx_over_errors);	440	NETSTAT_ENTRY(rx_over_errors);
441	NETSTAT_ENTRY(rx_crc_errors);	441	NETSTAT_ENTRY(rx_crc_errors);
442	NETSTAT_ENTRY(rx_frame_errors);	442	NETSTAT_ENTRY(rx_frame_errors);
443	NETSTAT_ENTRY(rx_fifo_errors);	443	NETSTAT_ENTRY(rx_fifo_errors);
444	NETSTAT_ENTRY(rx_missed_errors);	444	NETSTAT_ENTRY(rx_missed_errors);
445	NETSTAT_ENTRY(tx_aborted_errors);	445	NETSTAT_ENTRY(tx_aborted_errors);
446	NETSTAT_ENTRY(tx_carrier_errors);	446	NETSTAT_ENTRY(tx_carrier_errors);
447	NETSTAT_ENTRY(tx_fifo_errors);	447	NETSTAT_ENTRY(tx_fifo_errors);
448	NETSTAT_ENTRY(tx_heartbeat_errors);	448	NETSTAT_ENTRY(tx_heartbeat_errors);
449	NETSTAT_ENTRY(tx_window_errors);	449	NETSTAT_ENTRY(tx_window_errors);
450	NETSTAT_ENTRY(rx_compressed);	450	NETSTAT_ENTRY(rx_compressed);
451	NETSTAT_ENTRY(tx_compressed);	451	NETSTAT_ENTRY(tx_compressed);
452		452
453	static struct attribute *netstat_attrs[] = {	453	static struct attribute *netstat_attrs[] = {
454	&dev_attr_rx_packets.attr,	454	&dev_attr_rx_packets.attr,
455	&dev_attr_tx_packets.attr,	455	&dev_attr_tx_packets.attr,
456	&dev_attr_rx_bytes.attr,	456	&dev_attr_rx_bytes.attr,
457	&dev_attr_tx_bytes.attr,	457	&dev_attr_tx_bytes.attr,
458	&dev_attr_rx_errors.attr,	458	&dev_attr_rx_errors.attr,
459	&dev_attr_tx_errors.attr,	459	&dev_attr_tx_errors.attr,
460	&dev_attr_rx_dropped.attr,	460	&dev_attr_rx_dropped.attr,
461	&dev_attr_tx_dropped.attr,	461	&dev_attr_tx_dropped.attr,
462	&dev_attr_multicast.attr,	462	&dev_attr_multicast.attr,
463	&dev_attr_collisions.attr,	463	&dev_attr_collisions.attr,
464	&dev_attr_rx_length_errors.attr,	464	&dev_attr_rx_length_errors.attr,
465	&dev_attr_rx_over_errors.attr,	465	&dev_attr_rx_over_errors.attr,
466	&dev_attr_rx_crc_errors.attr,	466	&dev_attr_rx_crc_errors.attr,
467	&dev_attr_rx_frame_errors.attr,	467	&dev_attr_rx_frame_errors.attr,
468	&dev_attr_rx_fifo_errors.attr,	468	&dev_attr_rx_fifo_errors.attr,
469	&dev_attr_rx_missed_errors.attr,	469	&dev_attr_rx_missed_errors.attr,
470	&dev_attr_tx_aborted_errors.attr,	470	&dev_attr_tx_aborted_errors.attr,
471	&dev_attr_tx_carrier_errors.attr,	471	&dev_attr_tx_carrier_errors.attr,
472	&dev_attr_tx_fifo_errors.attr,	472	&dev_attr_tx_fifo_errors.attr,
473	&dev_attr_tx_heartbeat_errors.attr,	473	&dev_attr_tx_heartbeat_errors.attr,
474	&dev_attr_tx_window_errors.attr,	474	&dev_attr_tx_window_errors.attr,
475	&dev_attr_rx_compressed.attr,	475	&dev_attr_rx_compressed.attr,
476	&dev_attr_tx_compressed.attr,	476	&dev_attr_tx_compressed.attr,
477	NULL	477	NULL
478	};	478	};
479		479
480		480
481	static struct attribute_group netstat_group = {	481	static struct attribute_group netstat_group = {
482	.name = "statistics",	482	.name = "statistics",
483	.attrs = netstat_attrs,	483	.attrs = netstat_attrs,
484	};	484	};
485		485
486	#if IS_ENABLED(CONFIG_WIRELESS_EXT) \|\| IS_ENABLED(CONFIG_CFG80211)	486	#if IS_ENABLED(CONFIG_WIRELESS_EXT) \|\| IS_ENABLED(CONFIG_CFG80211)
487	static struct attribute *wireless_attrs[] = {	487	static struct attribute *wireless_attrs[] = {
488	NULL	488	NULL
489	};	489	};
490		490
491	static struct attribute_group wireless_group = {	491	static struct attribute_group wireless_group = {
492	.name = "wireless",	492	.name = "wireless",
493	.attrs = wireless_attrs,	493	.attrs = wireless_attrs,
494	};	494	};
495	#endif	495	#endif
496		496
497	#else /* CONFIG_SYSFS */	497	#else /* CONFIG_SYSFS */
498	#define net_class_groups NULL	498	#define net_class_groups NULL
499	#endif /* CONFIG_SYSFS */	499	#endif /* CONFIG_SYSFS */
500		500
501	#ifdef CONFIG_RPS	501	#ifdef CONFIG_RPS
502	/*	502	/*
503	* RX queue sysfs structures and functions.	503	* RX queue sysfs structures and functions.
504	*/	504	*/
505	struct rx_queue_attribute {	505	struct rx_queue_attribute {
506	struct attribute attr;	506	struct attribute attr;
507	ssize_t (show)(struct netdev_rx_queue queue,	507	ssize_t (show)(struct netdev_rx_queue queue,
508	struct rx_queue_attribute attr, char buf);	508	struct rx_queue_attribute attr, char buf);
509	ssize_t (store)(struct netdev_rx_queue queue,	509	ssize_t (store)(struct netdev_rx_queue queue,
510	struct rx_queue_attribute attr, const char buf, size_t len);	510	struct rx_queue_attribute attr, const char buf, size_t len);
511	};	511	};
512	#define to_rx_queue_attr(_attr) container_of(_attr, \	512	#define to_rx_queue_attr(_attr) container_of(_attr, \
513	struct rx_queue_attribute, attr)	513	struct rx_queue_attribute, attr)
514		514
515	#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)	515	#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
516		516
517	static ssize_t rx_queue_attr_show(struct kobject kobj, struct attribute attr,	517	static ssize_t rx_queue_attr_show(struct kobject kobj, struct attribute attr,
518	char *buf)	518	char *buf)
519	{	519	{
520	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);	520	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
521	struct netdev_rx_queue *queue = to_rx_queue(kobj);	521	struct netdev_rx_queue *queue = to_rx_queue(kobj);
522		522
523	if (!attribute->show)	523	if (!attribute->show)
524	return -EIO;	524	return -EIO;
525		525
526	return attribute->show(queue, attribute, buf);	526	return attribute->show(queue, attribute, buf);
527	}	527	}
528		528
529	static ssize_t rx_queue_attr_store(struct kobject kobj, struct attribute attr,	529	static ssize_t rx_queue_attr_store(struct kobject kobj, struct attribute attr,
530	const char *buf, size_t count)	530	const char *buf, size_t count)
531	{	531	{
532	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);	532	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
533	struct netdev_rx_queue *queue = to_rx_queue(kobj);	533	struct netdev_rx_queue *queue = to_rx_queue(kobj);
534		534
535	if (!attribute->store)	535	if (!attribute->store)
536	return -EIO;	536	return -EIO;
537		537
538	return attribute->store(queue, attribute, buf, count);	538	return attribute->store(queue, attribute, buf, count);
539	}	539	}
540		540
541	static const struct sysfs_ops rx_queue_sysfs_ops = {	541	static const struct sysfs_ops rx_queue_sysfs_ops = {
542	.show = rx_queue_attr_show,	542	.show = rx_queue_attr_show,
543	.store = rx_queue_attr_store,	543	.store = rx_queue_attr_store,
544	};	544	};
545		545
546	static ssize_t show_rps_map(struct netdev_rx_queue *queue,	546	static ssize_t show_rps_map(struct netdev_rx_queue *queue,
547	struct rx_queue_attribute attribute, char buf)	547	struct rx_queue_attribute attribute, char buf)
548	{	548	{
549	struct rps_map *map;	549	struct rps_map *map;
550	cpumask_var_t mask;	550	cpumask_var_t mask;
551	size_t len = 0;	551	size_t len = 0;
552	int i;	552	int i;
553		553
554	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))	554	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
555	return -ENOMEM;	555	return -ENOMEM;
556		556
557	rcu_read_lock();	557	rcu_read_lock();
558	map = rcu_dereference(queue->rps_map);	558	map = rcu_dereference(queue->rps_map);
559	if (map)	559	if (map)
560	for (i = 0; i < map->len; i++)	560	for (i = 0; i < map->len; i++)
561	cpumask_set_cpu(map->cpus[i], mask);	561	cpumask_set_cpu(map->cpus[i], mask);
562		562
563	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);	563	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
564	if (PAGE_SIZE - len < 3) {	564	if (PAGE_SIZE - len < 3) {
565	rcu_read_unlock();	565	rcu_read_unlock();
566	free_cpumask_var(mask);	566	free_cpumask_var(mask);
567	return -EINVAL;	567	return -EINVAL;
568	}	568	}
569	rcu_read_unlock();	569	rcu_read_unlock();
570		570
571	free_cpumask_var(mask);	571	free_cpumask_var(mask);
572	len += sprintf(buf + len, "\n");	572	len += sprintf(buf + len, "\n");
573	return len;	573	return len;
574	}	574	}
575		575
576	static ssize_t store_rps_map(struct netdev_rx_queue *queue,	576	static ssize_t store_rps_map(struct netdev_rx_queue *queue,
577	struct rx_queue_attribute *attribute,	577	struct rx_queue_attribute *attribute,
578	const char *buf, size_t len)	578	const char *buf, size_t len)
579	{	579	{
580	struct rps_map old_map, map;	580	struct rps_map old_map, map;
581	cpumask_var_t mask;	581	cpumask_var_t mask;
582	int err, cpu, i;	582	int err, cpu, i;
583	static DEFINE_SPINLOCK(rps_map_lock);	583	static DEFINE_SPINLOCK(rps_map_lock);
584		584
585	if (!capable(CAP_NET_ADMIN))	585	if (!capable(CAP_NET_ADMIN))
586	return -EPERM;	586	return -EPERM;
587		587
588	if (!alloc_cpumask_var(&mask, GFP_KERNEL))	588	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
589	return -ENOMEM;	589	return -ENOMEM;
590		590
591	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);	591	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
592	if (err) {	592	if (err) {
593	free_cpumask_var(mask);	593	free_cpumask_var(mask);
594	return err;	594	return err;
595	}	595	}
596		596
597	map = kzalloc(max_t(unsigned int,	597	map = kzalloc(max_t(unsigned int,
598	RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),	598	RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
599	GFP_KERNEL);	599	GFP_KERNEL);
600	if (!map) {	600	if (!map) {
601	free_cpumask_var(mask);	601	free_cpumask_var(mask);
602	return -ENOMEM;	602	return -ENOMEM;
603	}	603	}
604		604
605	i = 0;	605	i = 0;
606	for_each_cpu_and(cpu, mask, cpu_online_mask)	606	for_each_cpu_and(cpu, mask, cpu_online_mask)
607	map->cpus[i++] = cpu;	607	map->cpus[i++] = cpu;
608		608
609	if (i)	609	if (i)
610	map->len = i;	610	map->len = i;
611	else {	611	else {
612	kfree(map);	612	kfree(map);
613	map = NULL;	613	map = NULL;
614	}	614	}
615		615
616	spin_lock(&rps_map_lock);	616	spin_lock(&rps_map_lock);
617	old_map = rcu_dereference_protected(queue->rps_map,	617	old_map = rcu_dereference_protected(queue->rps_map,
618	lockdep_is_held(&rps_map_lock));	618	lockdep_is_held(&rps_map_lock));
619	rcu_assign_pointer(queue->rps_map, map);	619	rcu_assign_pointer(queue->rps_map, map);
620	spin_unlock(&rps_map_lock);	620	spin_unlock(&rps_map_lock);
621		621
622	if (map)	622	if (map)
623	static_key_slow_inc(&rps_needed);	623	static_key_slow_inc(&rps_needed);
624	if (old_map) {	624	if (old_map) {
625	kfree_rcu(old_map, rcu);	625	kfree_rcu(old_map, rcu);
626	static_key_slow_dec(&rps_needed);	626	static_key_slow_dec(&rps_needed);
627	}	627	}
628	free_cpumask_var(mask);	628	free_cpumask_var(mask);
629	return len;	629	return len;
630	}	630	}
631		631
632	static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,	632	static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
633	struct rx_queue_attribute *attr,	633	struct rx_queue_attribute *attr,
634	char *buf)	634	char *buf)
635	{	635	{
636	struct rps_dev_flow_table *flow_table;	636	struct rps_dev_flow_table *flow_table;
637	unsigned long val = 0;	637	unsigned long val = 0;
638		638
639	rcu_read_lock();	639	rcu_read_lock();
640	flow_table = rcu_dereference(queue->rps_flow_table);	640	flow_table = rcu_dereference(queue->rps_flow_table);
641	if (flow_table)	641	if (flow_table)
642	val = (unsigned long)flow_table->mask + 1;	642	val = (unsigned long)flow_table->mask + 1;
643	rcu_read_unlock();	643	rcu_read_unlock();
644		644
645	return sprintf(buf, "%lu\n", val);	645	return sprintf(buf, "%lu\n", val);
646	}	646	}
647		647
648	static void rps_dev_flow_table_release(struct rcu_head *rcu)	648	static void rps_dev_flow_table_release(struct rcu_head *rcu)
649	{	649	{
650	struct rps_dev_flow_table *table = container_of(rcu,	650	struct rps_dev_flow_table *table = container_of(rcu,
651	struct rps_dev_flow_table, rcu);	651	struct rps_dev_flow_table, rcu);
652	vfree(table);	652	vfree(table);
653	}	653	}
654		654
655	static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,	655	static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
656	struct rx_queue_attribute *attr,	656	struct rx_queue_attribute *attr,
657	const char *buf, size_t len)	657	const char *buf, size_t len)
658	{	658	{
659	unsigned long mask, count;	659	unsigned long mask, count;
660	struct rps_dev_flow_table table, old_table;	660	struct rps_dev_flow_table table, old_table;
661	static DEFINE_SPINLOCK(rps_dev_flow_lock);	661	static DEFINE_SPINLOCK(rps_dev_flow_lock);
662	int rc;	662	int rc;
663		663
664	if (!capable(CAP_NET_ADMIN))	664	if (!capable(CAP_NET_ADMIN))
665	return -EPERM;	665	return -EPERM;
666		666
667	rc = kstrtoul(buf, 0, &count);	667	rc = kstrtoul(buf, 0, &count);
668	if (rc < 0)	668	if (rc < 0)
669	return rc;	669	return rc;
670		670
671	if (count) {	671	if (count) {
672	mask = count - 1;	672	mask = count - 1;
673	/* mask = roundup_pow_of_two(count) - 1;	673	/* mask = roundup_pow_of_two(count) - 1;
674	* without overflows...	674	* without overflows...
675	*/	675	*/
676	while ((mask \| (mask >> 1)) != mask)	676	while ((mask \| (mask >> 1)) != mask)
677	mask \|= (mask >> 1);	677	mask \|= (mask >> 1);
678	/* On 64 bit arches, must check mask fits in table->mask (u32),	678	/* On 64 bit arches, must check mask fits in table->mask (u32),
679	* and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)	679	* and on 32bit arches, must check
680	* doesnt overflow.	680	* RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
681	*/	681	*/
682	#if BITS_PER_LONG > 32	682	#if BITS_PER_LONG > 32
683	if (mask > (unsigned long)(u32)mask)	683	if (mask > (unsigned long)(u32)mask)
684	return -EINVAL;	684	return -EINVAL;
685	#else	685	#else
686	if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))	686	if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
687	/ sizeof(struct rps_dev_flow)) {	687	/ sizeof(struct rps_dev_flow)) {
688	/* Enforce a limit to prevent overflow */	688	/* Enforce a limit to prevent overflow */
689	return -EINVAL;	689	return -EINVAL;
690	}	690	}
691	#endif	691	#endif
692	table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));	692	table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
693	if (!table)	693	if (!table)
694	return -ENOMEM;	694	return -ENOMEM;
695		695
696	table->mask = mask;	696	table->mask = mask;
697	for (count = 0; count <= mask; count++)	697	for (count = 0; count <= mask; count++)
698	table->flows[count].cpu = RPS_NO_CPU;	698	table->flows[count].cpu = RPS_NO_CPU;
699	} else	699	} else
700	table = NULL;	700	table = NULL;
701		701
702	spin_lock(&rps_dev_flow_lock);	702	spin_lock(&rps_dev_flow_lock);
703	old_table = rcu_dereference_protected(queue->rps_flow_table,	703	old_table = rcu_dereference_protected(queue->rps_flow_table,
704	lockdep_is_held(&rps_dev_flow_lock));	704	lockdep_is_held(&rps_dev_flow_lock));
705	rcu_assign_pointer(queue->rps_flow_table, table);	705	rcu_assign_pointer(queue->rps_flow_table, table);
706	spin_unlock(&rps_dev_flow_lock);	706	spin_unlock(&rps_dev_flow_lock);
707		707
708	if (old_table)	708	if (old_table)
709	call_rcu(&old_table->rcu, rps_dev_flow_table_release);	709	call_rcu(&old_table->rcu, rps_dev_flow_table_release);
710		710
711	return len;	711	return len;
712	}	712	}
713		713
714	static struct rx_queue_attribute rps_cpus_attribute =	714	static struct rx_queue_attribute rps_cpus_attribute =
715	__ATTR(rps_cpus, S_IRUGO \| S_IWUSR, show_rps_map, store_rps_map);	715	__ATTR(rps_cpus, S_IRUGO \| S_IWUSR, show_rps_map, store_rps_map);
716		716
717		717
718	static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =	718	static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
719	__ATTR(rps_flow_cnt, S_IRUGO \| S_IWUSR,	719	__ATTR(rps_flow_cnt, S_IRUGO \| S_IWUSR,
720	show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);	720	show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
721		721
722	static struct attribute *rx_queue_default_attrs[] = {	722	static struct attribute *rx_queue_default_attrs[] = {
723	&rps_cpus_attribute.attr,	723	&rps_cpus_attribute.attr,
724	&rps_dev_flow_table_cnt_attribute.attr,	724	&rps_dev_flow_table_cnt_attribute.attr,
725	NULL	725	NULL
726	};	726	};
727		727
728	static void rx_queue_release(struct kobject *kobj)	728	static void rx_queue_release(struct kobject *kobj)
729	{	729	{
730	struct netdev_rx_queue *queue = to_rx_queue(kobj);	730	struct netdev_rx_queue *queue = to_rx_queue(kobj);
731	struct rps_map *map;	731	struct rps_map *map;
732	struct rps_dev_flow_table *flow_table;	732	struct rps_dev_flow_table *flow_table;
733		733
734		734
735	map = rcu_dereference_protected(queue->rps_map, 1);	735	map = rcu_dereference_protected(queue->rps_map, 1);
736	if (map) {	736	if (map) {
737	RCU_INIT_POINTER(queue->rps_map, NULL);	737	RCU_INIT_POINTER(queue->rps_map, NULL);
738	kfree_rcu(map, rcu);	738	kfree_rcu(map, rcu);
739	}	739	}
740		740
741	flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);	741	flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
742	if (flow_table) {	742	if (flow_table) {
743	RCU_INIT_POINTER(queue->rps_flow_table, NULL);	743	RCU_INIT_POINTER(queue->rps_flow_table, NULL);
744	call_rcu(&flow_table->rcu, rps_dev_flow_table_release);	744	call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
745	}	745	}
746		746
747	memset(kobj, 0, sizeof(*kobj));	747	memset(kobj, 0, sizeof(*kobj));
748	dev_put(queue->dev);	748	dev_put(queue->dev);
749	}	749	}
750		750
751	static struct kobj_type rx_queue_ktype = {	751	static struct kobj_type rx_queue_ktype = {
752	.sysfs_ops = &rx_queue_sysfs_ops,	752	.sysfs_ops = &rx_queue_sysfs_ops,
753	.release = rx_queue_release,	753	.release = rx_queue_release,
754	.default_attrs = rx_queue_default_attrs,	754	.default_attrs = rx_queue_default_attrs,
755	};	755	};
756		756
757	static int rx_queue_add_kobject(struct net_device *net, int index)	757	static int rx_queue_add_kobject(struct net_device *net, int index)
758	{	758	{
759	struct netdev_rx_queue *queue = net->_rx + index;	759	struct netdev_rx_queue *queue = net->_rx + index;
760	struct kobject *kobj = &queue->kobj;	760	struct kobject *kobj = &queue->kobj;
761	int error = 0;	761	int error = 0;
762		762
763	kobj->kset = net->queues_kset;	763	kobj->kset = net->queues_kset;
764	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,	764	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
765	"rx-%u", index);	765	"rx-%u", index);
766	if (error) {	766	if (error) {
767	kobject_put(kobj);	767	kobject_put(kobj);
768	return error;	768	return error;
769	}	769	}
770		770
771	kobject_uevent(kobj, KOBJ_ADD);	771	kobject_uevent(kobj, KOBJ_ADD);
772	dev_hold(queue->dev);	772	dev_hold(queue->dev);
773		773
774	return error;	774	return error;
775	}	775	}
776	#endif /* CONFIG_RPS */	776	#endif /* CONFIG_RPS */
777		777
778	int	778	int
779	net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)	779	net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
780	{	780	{
781	#ifdef CONFIG_RPS	781	#ifdef CONFIG_RPS
782	int i;	782	int i;
783	int error = 0;	783	int error = 0;
784		784
785	for (i = old_num; i < new_num; i++) {	785	for (i = old_num; i < new_num; i++) {
786	error = rx_queue_add_kobject(net, i);	786	error = rx_queue_add_kobject(net, i);
787	if (error) {	787	if (error) {
788	new_num = old_num;	788	new_num = old_num;
789	break;	789	break;
790	}	790	}
791	}	791	}
792		792
793	while (--i >= new_num)	793	while (--i >= new_num)
794	kobject_put(&net->_rx[i].kobj);	794	kobject_put(&net->_rx[i].kobj);
795		795
796	return error;	796	return error;
797	#else	797	#else
798	return 0;	798	return 0;
799	#endif	799	#endif
800	}	800	}
801		801
802	#ifdef CONFIG_SYSFS	802	#ifdef CONFIG_SYSFS
803	/*	803	/*
804	* netdev_queue sysfs structures and functions.	804	* netdev_queue sysfs structures and functions.
805	*/	805	*/
806	struct netdev_queue_attribute {	806	struct netdev_queue_attribute {
807	struct attribute attr;	807	struct attribute attr;
808	ssize_t (show)(struct netdev_queue queue,	808	ssize_t (show)(struct netdev_queue queue,
809	struct netdev_queue_attribute attr, char buf);	809	struct netdev_queue_attribute attr, char buf);
810	ssize_t (store)(struct netdev_queue queue,	810	ssize_t (store)(struct netdev_queue queue,
811	struct netdev_queue_attribute attr, const char buf, size_t len);	811	struct netdev_queue_attribute attr, const char buf, size_t len);
812	};	812	};
813	#define to_netdev_queue_attr(_attr) container_of(_attr, \	813	#define to_netdev_queue_attr(_attr) container_of(_attr, \
814	struct netdev_queue_attribute, attr)	814	struct netdev_queue_attribute, attr)
815		815
816	#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)	816	#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
817		817
818	static ssize_t netdev_queue_attr_show(struct kobject *kobj,	818	static ssize_t netdev_queue_attr_show(struct kobject *kobj,
819	struct attribute attr, char buf)	819	struct attribute attr, char buf)
820	{	820	{
821	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);	821	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
822	struct netdev_queue *queue = to_netdev_queue(kobj);	822	struct netdev_queue *queue = to_netdev_queue(kobj);
823		823
824	if (!attribute->show)	824	if (!attribute->show)
825	return -EIO;	825	return -EIO;
826		826
827	return attribute->show(queue, attribute, buf);	827	return attribute->show(queue, attribute, buf);
828	}	828	}
829		829
830	static ssize_t netdev_queue_attr_store(struct kobject *kobj,	830	static ssize_t netdev_queue_attr_store(struct kobject *kobj,
831	struct attribute *attr,	831	struct attribute *attr,
832	const char *buf, size_t count)	832	const char *buf, size_t count)
833	{	833	{
834	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);	834	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
835	struct netdev_queue *queue = to_netdev_queue(kobj);	835	struct netdev_queue *queue = to_netdev_queue(kobj);
836		836
837	if (!attribute->store)	837	if (!attribute->store)
838	return -EIO;	838	return -EIO;
839		839
840	return attribute->store(queue, attribute, buf, count);	840	return attribute->store(queue, attribute, buf, count);
841	}	841	}
842		842
843	static const struct sysfs_ops netdev_queue_sysfs_ops = {	843	static const struct sysfs_ops netdev_queue_sysfs_ops = {
844	.show = netdev_queue_attr_show,	844	.show = netdev_queue_attr_show,
845	.store = netdev_queue_attr_store,	845	.store = netdev_queue_attr_store,
846	};	846	};
847		847
848	static ssize_t show_trans_timeout(struct netdev_queue *queue,	848	static ssize_t show_trans_timeout(struct netdev_queue *queue,
849	struct netdev_queue_attribute *attribute,	849	struct netdev_queue_attribute *attribute,
850	char *buf)	850	char *buf)
851	{	851	{
852	unsigned long trans_timeout;	852	unsigned long trans_timeout;
853		853
854	spin_lock_irq(&queue->_xmit_lock);	854	spin_lock_irq(&queue->_xmit_lock);
855	trans_timeout = queue->trans_timeout;	855	trans_timeout = queue->trans_timeout;
856	spin_unlock_irq(&queue->_xmit_lock);	856	spin_unlock_irq(&queue->_xmit_lock);
857		857
858	return sprintf(buf, "%lu", trans_timeout);	858	return sprintf(buf, "%lu", trans_timeout);
859	}	859	}
860		860
861	static struct netdev_queue_attribute queue_trans_timeout =	861	static struct netdev_queue_attribute queue_trans_timeout =
862	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);	862	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
863		863
864	#ifdef CONFIG_BQL	864	#ifdef CONFIG_BQL
865	/*	865	/*
866	* Byte queue limits sysfs structures and functions.	866	* Byte queue limits sysfs structures and functions.
867	*/	867	*/
868	static ssize_t bql_show(char *buf, unsigned int value)	868	static ssize_t bql_show(char *buf, unsigned int value)
869	{	869	{
870	return sprintf(buf, "%u\n", value);	870	return sprintf(buf, "%u\n", value);
871	}	871	}
872		872
873	static ssize_t bql_set(const char *buf, const size_t count,	873	static ssize_t bql_set(const char *buf, const size_t count,
874	unsigned int *pvalue)	874	unsigned int *pvalue)
875	{	875	{
876	unsigned int value;	876	unsigned int value;
877	int err;	877	int err;
878		878
879	if (!strcmp(buf, "max") \|\| !strcmp(buf, "max\n"))	879	if (!strcmp(buf, "max") \|\| !strcmp(buf, "max\n"))
880	value = DQL_MAX_LIMIT;	880	value = DQL_MAX_LIMIT;
881	else {	881	else {
882	err = kstrtouint(buf, 10, &value);	882	err = kstrtouint(buf, 10, &value);
883	if (err < 0)	883	if (err < 0)
884	return err;	884	return err;
885	if (value > DQL_MAX_LIMIT)	885	if (value > DQL_MAX_LIMIT)
886	return -EINVAL;	886	return -EINVAL;
887	}	887	}
888		888
889	*pvalue = value;	889	*pvalue = value;
890		890
891	return count;	891	return count;
892	}	892	}
893		893
894	static ssize_t bql_show_hold_time(struct netdev_queue *queue,	894	static ssize_t bql_show_hold_time(struct netdev_queue *queue,
895	struct netdev_queue_attribute *attr,	895	struct netdev_queue_attribute *attr,
896	char *buf)	896	char *buf)
897	{	897	{
898	struct dql *dql = &queue->dql;	898	struct dql *dql = &queue->dql;
899		899
900	return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));	900	return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
901	}	901	}
902		902
903	static ssize_t bql_set_hold_time(struct netdev_queue *queue,	903	static ssize_t bql_set_hold_time(struct netdev_queue *queue,
904	struct netdev_queue_attribute *attribute,	904	struct netdev_queue_attribute *attribute,
905	const char *buf, size_t len)	905	const char *buf, size_t len)
906	{	906	{
907	struct dql *dql = &queue->dql;	907	struct dql *dql = &queue->dql;
908	unsigned int value;	908	unsigned int value;
909	int err;	909	int err;
910		910
911	err = kstrtouint(buf, 10, &value);	911	err = kstrtouint(buf, 10, &value);
912	if (err < 0)	912	if (err < 0)
913	return err;	913	return err;
914		914
915	dql->slack_hold_time = msecs_to_jiffies(value);	915	dql->slack_hold_time = msecs_to_jiffies(value);
916		916
917	return len;	917	return len;
918	}	918	}
919		919
920	static struct netdev_queue_attribute bql_hold_time_attribute =	920	static struct netdev_queue_attribute bql_hold_time_attribute =
921	__ATTR(hold_time, S_IRUGO \| S_IWUSR, bql_show_hold_time,	921	__ATTR(hold_time, S_IRUGO \| S_IWUSR, bql_show_hold_time,
922	bql_set_hold_time);	922	bql_set_hold_time);
923		923
924	static ssize_t bql_show_inflight(struct netdev_queue *queue,	924	static ssize_t bql_show_inflight(struct netdev_queue *queue,
925	struct netdev_queue_attribute *attr,	925	struct netdev_queue_attribute *attr,
926	char *buf)	926	char *buf)
927	{	927	{
928	struct dql *dql = &queue->dql;	928	struct dql *dql = &queue->dql;
929		929
930	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);	930	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
931	}	931	}
932		932
933	static struct netdev_queue_attribute bql_inflight_attribute =	933	static struct netdev_queue_attribute bql_inflight_attribute =
934	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);	934	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
935		935
936	#define BQL_ATTR(NAME, FIELD) \	936	#define BQL_ATTR(NAME, FIELD) \
937	static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \	937	static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
938	struct netdev_queue_attribute *attr, \	938	struct netdev_queue_attribute *attr, \
939	char *buf) \	939	char *buf) \
940	{ \	940	{ \
941	return bql_show(buf, queue->dql.FIELD); \	941	return bql_show(buf, queue->dql.FIELD); \
942	} \	942	} \
943	\	943	\
944	static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \	944	static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
945	struct netdev_queue_attribute *attr, \	945	struct netdev_queue_attribute *attr, \
946	const char *buf, size_t len) \	946	const char *buf, size_t len) \
947	{ \	947	{ \
948	return bql_set(buf, len, &queue->dql.FIELD); \	948	return bql_set(buf, len, &queue->dql.FIELD); \
949	} \	949	} \
950	\	950	\
951	static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \	951	static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \
952	__ATTR(NAME, S_IRUGO \| S_IWUSR, bql_show_ ## NAME, \	952	__ATTR(NAME, S_IRUGO \| S_IWUSR, bql_show_ ## NAME, \
953	bql_set_ ## NAME);	953	bql_set_ ## NAME);
954		954
955	BQL_ATTR(limit, limit)	955	BQL_ATTR(limit, limit)
956	BQL_ATTR(limit_max, max_limit)	956	BQL_ATTR(limit_max, max_limit)
957	BQL_ATTR(limit_min, min_limit)	957	BQL_ATTR(limit_min, min_limit)
958		958
959	static struct attribute *dql_attrs[] = {	959	static struct attribute *dql_attrs[] = {
960	&bql_limit_attribute.attr,	960	&bql_limit_attribute.attr,
961	&bql_limit_max_attribute.attr,	961	&bql_limit_max_attribute.attr,
962	&bql_limit_min_attribute.attr,	962	&bql_limit_min_attribute.attr,
963	&bql_hold_time_attribute.attr,	963	&bql_hold_time_attribute.attr,
964	&bql_inflight_attribute.attr,	964	&bql_inflight_attribute.attr,
965	NULL	965	NULL
966	};	966	};
967		967
968	static struct attribute_group dql_group = {	968	static struct attribute_group dql_group = {
969	.name = "byte_queue_limits",	969	.name = "byte_queue_limits",
970	.attrs = dql_attrs,	970	.attrs = dql_attrs,
971	};	971	};
972	#endif /* CONFIG_BQL */	972	#endif /* CONFIG_BQL */
973		973
974	#ifdef CONFIG_XPS	974	#ifdef CONFIG_XPS
975	static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)	975	static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
976	{	976	{
977	struct net_device *dev = queue->dev;	977	struct net_device *dev = queue->dev;
978	int i;	978	int i;
979		979
980	for (i = 0; i < dev->num_tx_queues; i++)	980	for (i = 0; i < dev->num_tx_queues; i++)
981	if (queue == &dev->_tx[i])	981	if (queue == &dev->_tx[i])
982	break;	982	break;
983		983
984	BUG_ON(i >= dev->num_tx_queues);	984	BUG_ON(i >= dev->num_tx_queues);
985		985
986	return i;	986	return i;
987	}	987	}
988		988
989		989
990	static ssize_t show_xps_map(struct netdev_queue *queue,	990	static ssize_t show_xps_map(struct netdev_queue *queue,
991	struct netdev_queue_attribute attribute, char buf)	991	struct netdev_queue_attribute attribute, char buf)
992	{	992	{
993	struct net_device *dev = queue->dev;	993	struct net_device *dev = queue->dev;
994	struct xps_dev_maps *dev_maps;	994	struct xps_dev_maps *dev_maps;
995	cpumask_var_t mask;	995	cpumask_var_t mask;
996	unsigned long index;	996	unsigned long index;
997	size_t len = 0;	997	size_t len = 0;
998	int i;	998	int i;
999		999
1000	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))	1000	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
1001	return -ENOMEM;	1001	return -ENOMEM;
1002		1002
1003	index = get_netdev_queue_index(queue);	1003	index = get_netdev_queue_index(queue);
1004		1004
1005	rcu_read_lock();	1005	rcu_read_lock();
1006	dev_maps = rcu_dereference(dev->xps_maps);	1006	dev_maps = rcu_dereference(dev->xps_maps);
1007	if (dev_maps) {	1007	if (dev_maps) {
1008	for_each_possible_cpu(i) {	1008	for_each_possible_cpu(i) {
1009	struct xps_map *map =	1009	struct xps_map *map =
1010	rcu_dereference(dev_maps->cpu_map[i]);	1010	rcu_dereference(dev_maps->cpu_map[i]);
1011	if (map) {	1011	if (map) {
1012	int j;	1012	int j;
1013	for (j = 0; j < map->len; j++) {	1013	for (j = 0; j < map->len; j++) {
1014	if (map->queues[j] == index) {	1014	if (map->queues[j] == index) {
1015	cpumask_set_cpu(i, mask);	1015	cpumask_set_cpu(i, mask);
1016	break;	1016	break;
1017	}	1017	}
1018	}	1018	}
1019	}	1019	}
1020	}	1020	}
1021	}	1021	}
1022	rcu_read_unlock();	1022	rcu_read_unlock();
1023		1023
1024	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);	1024	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
1025	if (PAGE_SIZE - len < 3) {	1025	if (PAGE_SIZE - len < 3) {
1026	free_cpumask_var(mask);	1026	free_cpumask_var(mask);
1027	return -EINVAL;	1027	return -EINVAL;
1028	}	1028	}
1029		1029
1030	free_cpumask_var(mask);	1030	free_cpumask_var(mask);
1031	len += sprintf(buf + len, "\n");	1031	len += sprintf(buf + len, "\n");
1032	return len;	1032	return len;
1033	}	1033	}
1034		1034
1035	static ssize_t store_xps_map(struct netdev_queue *queue,	1035	static ssize_t store_xps_map(struct netdev_queue *queue,
1036	struct netdev_queue_attribute *attribute,	1036	struct netdev_queue_attribute *attribute,
1037	const char *buf, size_t len)	1037	const char *buf, size_t len)
1038	{	1038	{
1039	struct net_device *dev = queue->dev;	1039	struct net_device *dev = queue->dev;
1040	unsigned long index;	1040	unsigned long index;
1041	cpumask_var_t mask;	1041	cpumask_var_t mask;
1042	int err;	1042	int err;
1043		1043
1044	if (!capable(CAP_NET_ADMIN))	1044	if (!capable(CAP_NET_ADMIN))
1045	return -EPERM;	1045	return -EPERM;
1046		1046
1047	if (!alloc_cpumask_var(&mask, GFP_KERNEL))	1047	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1048	return -ENOMEM;	1048	return -ENOMEM;
1049		1049
1050	index = get_netdev_queue_index(queue);	1050	index = get_netdev_queue_index(queue);
1051		1051
1052	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);	1052	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1053	if (err) {	1053	if (err) {
1054	free_cpumask_var(mask);	1054	free_cpumask_var(mask);
1055	return err;	1055	return err;
1056	}	1056	}
1057		1057
1058	err = netif_set_xps_queue(dev, mask, index);	1058	err = netif_set_xps_queue(dev, mask, index);
1059		1059
1060	free_cpumask_var(mask);	1060	free_cpumask_var(mask);
1061		1061
1062	return err ? : len;	1062	return err ? : len;
1063	}	1063	}
1064		1064
1065	static struct netdev_queue_attribute xps_cpus_attribute =	1065	static struct netdev_queue_attribute xps_cpus_attribute =
1066	__ATTR(xps_cpus, S_IRUGO \| S_IWUSR, show_xps_map, store_xps_map);	1066	__ATTR(xps_cpus, S_IRUGO \| S_IWUSR, show_xps_map, store_xps_map);
1067	#endif /* CONFIG_XPS */	1067	#endif /* CONFIG_XPS */
1068		1068
1069	static struct attribute *netdev_queue_default_attrs[] = {	1069	static struct attribute *netdev_queue_default_attrs[] = {
1070	&queue_trans_timeout.attr,	1070	&queue_trans_timeout.attr,
1071	#ifdef CONFIG_XPS	1071	#ifdef CONFIG_XPS
1072	&xps_cpus_attribute.attr,	1072	&xps_cpus_attribute.attr,
1073	#endif	1073	#endif
1074	NULL	1074	NULL
1075	};	1075	};
1076		1076
1077	static void netdev_queue_release(struct kobject *kobj)	1077	static void netdev_queue_release(struct kobject *kobj)
1078	{	1078	{
1079	struct netdev_queue *queue = to_netdev_queue(kobj);	1079	struct netdev_queue *queue = to_netdev_queue(kobj);
1080		1080
1081	memset(kobj, 0, sizeof(*kobj));	1081	memset(kobj, 0, sizeof(*kobj));
1082	dev_put(queue->dev);	1082	dev_put(queue->dev);
1083	}	1083	}
1084		1084
1085	static struct kobj_type netdev_queue_ktype = {	1085	static struct kobj_type netdev_queue_ktype = {
1086	.sysfs_ops = &netdev_queue_sysfs_ops,	1086	.sysfs_ops = &netdev_queue_sysfs_ops,
1087	.release = netdev_queue_release,	1087	.release = netdev_queue_release,
1088	.default_attrs = netdev_queue_default_attrs,	1088	.default_attrs = netdev_queue_default_attrs,
1089	};	1089	};
1090		1090
1091	static int netdev_queue_add_kobject(struct net_device *net, int index)	1091	static int netdev_queue_add_kobject(struct net_device *net, int index)
1092	{	1092	{
1093	struct netdev_queue *queue = net->_tx + index;	1093	struct netdev_queue *queue = net->_tx + index;
1094	struct kobject *kobj = &queue->kobj;	1094	struct kobject *kobj = &queue->kobj;
1095	int error = 0;	1095	int error = 0;
1096		1096
1097	kobj->kset = net->queues_kset;	1097	kobj->kset = net->queues_kset;
1098	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,	1098	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1099	"tx-%u", index);	1099	"tx-%u", index);
1100	if (error)	1100	if (error)
1101	goto exit;	1101	goto exit;
1102		1102
1103	#ifdef CONFIG_BQL	1103	#ifdef CONFIG_BQL
1104	error = sysfs_create_group(kobj, &dql_group);	1104	error = sysfs_create_group(kobj, &dql_group);
1105	if (error)	1105	if (error)
1106	goto exit;	1106	goto exit;
1107	#endif	1107	#endif
1108		1108
1109	kobject_uevent(kobj, KOBJ_ADD);	1109	kobject_uevent(kobj, KOBJ_ADD);
1110	dev_hold(queue->dev);	1110	dev_hold(queue->dev);
1111		1111
1112	return 0;	1112	return 0;
1113	exit:	1113	exit:
1114	kobject_put(kobj);	1114	kobject_put(kobj);
1115	return error;	1115	return error;
1116	}	1116	}
1117	#endif /* CONFIG_SYSFS */	1117	#endif /* CONFIG_SYSFS */
1118		1118
1119	int	1119	int
1120	netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)	1120	netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1121	{	1121	{
1122	#ifdef CONFIG_SYSFS	1122	#ifdef CONFIG_SYSFS
1123	int i;	1123	int i;
1124	int error = 0;	1124	int error = 0;
1125		1125
1126	for (i = old_num; i < new_num; i++) {	1126	for (i = old_num; i < new_num; i++) {
1127	error = netdev_queue_add_kobject(net, i);	1127	error = netdev_queue_add_kobject(net, i);
1128	if (error) {	1128	if (error) {
1129	new_num = old_num;	1129	new_num = old_num;
1130	break;	1130	break;
1131	}	1131	}
1132	}	1132	}
1133		1133
1134	while (--i >= new_num) {	1134	while (--i >= new_num) {
1135	struct netdev_queue *queue = net->_tx + i;	1135	struct netdev_queue *queue = net->_tx + i;
1136		1136
1137	#ifdef CONFIG_BQL	1137	#ifdef CONFIG_BQL
1138	sysfs_remove_group(&queue->kobj, &dql_group);	1138	sysfs_remove_group(&queue->kobj, &dql_group);
1139	#endif	1139	#endif
1140	kobject_put(&queue->kobj);	1140	kobject_put(&queue->kobj);
1141	}	1141	}
1142		1142
1143	return error;	1143	return error;
1144	#else	1144	#else
1145	return 0;	1145	return 0;
1146	#endif /* CONFIG_SYSFS */	1146	#endif /* CONFIG_SYSFS */
1147	}	1147	}
1148		1148
1149	static int register_queue_kobjects(struct net_device *net)	1149	static int register_queue_kobjects(struct net_device *net)
1150	{	1150	{
1151	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;	1151	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1152		1152
1153	#ifdef CONFIG_SYSFS	1153	#ifdef CONFIG_SYSFS
1154	net->queues_kset = kset_create_and_add("queues",	1154	net->queues_kset = kset_create_and_add("queues",
1155	NULL, &net->dev.kobj);	1155	NULL, &net->dev.kobj);
1156	if (!net->queues_kset)	1156	if (!net->queues_kset)
1157	return -ENOMEM;	1157	return -ENOMEM;
1158	#endif	1158	#endif
1159		1159
1160	#ifdef CONFIG_RPS	1160	#ifdef CONFIG_RPS
1161	real_rx = net->real_num_rx_queues;	1161	real_rx = net->real_num_rx_queues;
1162	#endif	1162	#endif
1163	real_tx = net->real_num_tx_queues;	1163	real_tx = net->real_num_tx_queues;
1164		1164
1165	error = net_rx_queue_update_kobjects(net, 0, real_rx);	1165	error = net_rx_queue_update_kobjects(net, 0, real_rx);
1166	if (error)	1166	if (error)
1167	goto error;	1167	goto error;
1168	rxq = real_rx;	1168	rxq = real_rx;
1169		1169
1170	error = netdev_queue_update_kobjects(net, 0, real_tx);	1170	error = netdev_queue_update_kobjects(net, 0, real_tx);
1171	if (error)	1171	if (error)
1172	goto error;	1172	goto error;
1173	txq = real_tx;	1173	txq = real_tx;
1174		1174
1175	return 0;	1175	return 0;
1176		1176
1177	error:	1177	error:
1178	netdev_queue_update_kobjects(net, txq, 0);	1178	netdev_queue_update_kobjects(net, txq, 0);
1179	net_rx_queue_update_kobjects(net, rxq, 0);	1179	net_rx_queue_update_kobjects(net, rxq, 0);
1180	return error;	1180	return error;
1181	}	1181	}
1182		1182
1183	static void remove_queue_kobjects(struct net_device *net)	1183	static void remove_queue_kobjects(struct net_device *net)
1184	{	1184	{
1185	int real_rx = 0, real_tx = 0;	1185	int real_rx = 0, real_tx = 0;
1186		1186
1187	#ifdef CONFIG_RPS	1187	#ifdef CONFIG_RPS
1188	real_rx = net->real_num_rx_queues;	1188	real_rx = net->real_num_rx_queues;
1189	#endif	1189	#endif
1190	real_tx = net->real_num_tx_queues;	1190	real_tx = net->real_num_tx_queues;
1191		1191
1192	net_rx_queue_update_kobjects(net, real_rx, 0);	1192	net_rx_queue_update_kobjects(net, real_rx, 0);
1193	netdev_queue_update_kobjects(net, real_tx, 0);	1193	netdev_queue_update_kobjects(net, real_tx, 0);
1194	#ifdef CONFIG_SYSFS	1194	#ifdef CONFIG_SYSFS
1195	kset_unregister(net->queues_kset);	1195	kset_unregister(net->queues_kset);
1196	#endif	1196	#endif
1197	}	1197	}
1198		1198
1199	static bool net_current_may_mount(void)	1199	static bool net_current_may_mount(void)
1200	{	1200	{
1201	struct net *net = current->nsproxy->net_ns;	1201	struct net *net = current->nsproxy->net_ns;
1202		1202
1203	return ns_capable(net->user_ns, CAP_SYS_ADMIN);	1203	return ns_capable(net->user_ns, CAP_SYS_ADMIN);
1204	}	1204	}
1205		1205
1206	static void *net_grab_current_ns(void)	1206	static void *net_grab_current_ns(void)
1207	{	1207	{
1208	struct net *ns = current->nsproxy->net_ns;	1208	struct net *ns = current->nsproxy->net_ns;
1209	#ifdef CONFIG_NET_NS	1209	#ifdef CONFIG_NET_NS
1210	if (ns)	1210	if (ns)
1211	atomic_inc(&ns->passive);	1211	atomic_inc(&ns->passive);
1212	#endif	1212	#endif
1213	return ns;	1213	return ns;
1214	}	1214	}
1215		1215
1216	static const void *net_initial_ns(void)	1216	static const void *net_initial_ns(void)
1217	{	1217	{
1218	return &init_net;	1218	return &init_net;
1219	}	1219	}
1220		1220
1221	static const void net_netlink_ns(struct sock sk)	1221	static const void net_netlink_ns(struct sock sk)
1222	{	1222	{
1223	return sock_net(sk);	1223	return sock_net(sk);
1224	}	1224	}
1225		1225
1226	struct kobj_ns_type_operations net_ns_type_operations = {	1226	struct kobj_ns_type_operations net_ns_type_operations = {
1227	.type = KOBJ_NS_TYPE_NET,	1227	.type = KOBJ_NS_TYPE_NET,
1228	.current_may_mount = net_current_may_mount,	1228	.current_may_mount = net_current_may_mount,
1229	.grab_current_ns = net_grab_current_ns,	1229	.grab_current_ns = net_grab_current_ns,
1230	.netlink_ns = net_netlink_ns,	1230	.netlink_ns = net_netlink_ns,
1231	.initial_ns = net_initial_ns,	1231	.initial_ns = net_initial_ns,
1232	.drop_ns = net_drop_ns,	1232	.drop_ns = net_drop_ns,
1233	};	1233	};
1234	EXPORT_SYMBOL_GPL(net_ns_type_operations);	1234	EXPORT_SYMBOL_GPL(net_ns_type_operations);
1235		1235
1236	static int netdev_uevent(struct device d, struct kobj_uevent_env env)	1236	static int netdev_uevent(struct device d, struct kobj_uevent_env env)
1237	{	1237	{
1238	struct net_device *dev = to_net_dev(d);	1238	struct net_device *dev = to_net_dev(d);
1239	int retval;	1239	int retval;
1240		1240
1241	/* pass interface to uevent. */	1241	/* pass interface to uevent. */
1242	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);	1242	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
1243	if (retval)	1243	if (retval)
1244	goto exit;	1244	goto exit;
1245		1245
1246	/* pass ifindex to uevent.	1246	/* pass ifindex to uevent.
1247	* ifindex is useful as it won't change (interface name may change)	1247	* ifindex is useful as it won't change (interface name may change)
1248	* and is what RtNetlink uses natively. */	1248	* and is what RtNetlink uses natively. */
1249	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);	1249	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
1250		1250
1251	exit:	1251	exit:
1252	return retval;	1252	return retval;
1253	}	1253	}
1254		1254
1255	/*	1255	/*
1256	* netdev_release -- destroy and free a dead device.	1256	* netdev_release -- destroy and free a dead device.
1257	* Called when last reference to device kobject is gone.	1257	* Called when last reference to device kobject is gone.
1258	*/	1258	*/
1259	static void netdev_release(struct device *d)	1259	static void netdev_release(struct device *d)
1260	{	1260	{
1261	struct net_device *dev = to_net_dev(d);	1261	struct net_device *dev = to_net_dev(d);
1262		1262
1263	BUG_ON(dev->reg_state != NETREG_RELEASED);	1263	BUG_ON(dev->reg_state != NETREG_RELEASED);
1264		1264
1265	kfree(dev->ifalias);	1265	kfree(dev->ifalias);
1266	netdev_freemem(dev);	1266	netdev_freemem(dev);
1267	}	1267	}
1268		1268
1269	static const void net_namespace(struct device d)	1269	static const void net_namespace(struct device d)
1270	{	1270	{
1271	struct net_device *dev;	1271	struct net_device *dev;
1272	dev = container_of(d, struct net_device, dev);	1272	dev = container_of(d, struct net_device, dev);
1273	return dev_net(dev);	1273	return dev_net(dev);
1274	}	1274	}
1275		1275
1276	static struct class net_class = {	1276	static struct class net_class = {
1277	.name = "net",	1277	.name = "net",
1278	.dev_release = netdev_release,	1278	.dev_release = netdev_release,
1279	.dev_groups = net_class_groups,	1279	.dev_groups = net_class_groups,
1280	.dev_uevent = netdev_uevent,	1280	.dev_uevent = netdev_uevent,
1281	.ns_type = &net_ns_type_operations,	1281	.ns_type = &net_ns_type_operations,
1282	.namespace = net_namespace,	1282	.namespace = net_namespace,
1283	};	1283	};
1284		1284
1285	/* Delete sysfs entries but hold kobject reference until after all	1285	/* Delete sysfs entries but hold kobject reference until after all
1286	* netdev references are gone.	1286	* netdev references are gone.
1287	*/	1287	*/
1288	void netdev_unregister_kobject(struct net_device * net)	1288	void netdev_unregister_kobject(struct net_device * net)
1289	{	1289	{
1290	struct device *dev = &(net->dev);	1290	struct device *dev = &(net->dev);
1291		1291
1292	kobject_get(&dev->kobj);	1292	kobject_get(&dev->kobj);
1293		1293
1294	remove_queue_kobjects(net);	1294	remove_queue_kobjects(net);
1295		1295
1296	pm_runtime_set_memalloc_noio(dev, false);	1296	pm_runtime_set_memalloc_noio(dev, false);
1297		1297
1298	device_del(dev);	1298	device_del(dev);
1299	}	1299	}
1300		1300
1301	/* Create sysfs entries for network device. */	1301	/* Create sysfs entries for network device. */
1302	int netdev_register_kobject(struct net_device *net)	1302	int netdev_register_kobject(struct net_device *net)
1303	{	1303	{
1304	struct device *dev = &(net->dev);	1304	struct device *dev = &(net->dev);
1305	const struct attribute_group **groups = net->sysfs_groups;	1305	const struct attribute_group **groups = net->sysfs_groups;
1306	int error = 0;	1306	int error = 0;
1307		1307
1308	device_initialize(dev);	1308	device_initialize(dev);
1309	dev->class = &net_class;	1309	dev->class = &net_class;
1310	dev->platform_data = net;	1310	dev->platform_data = net;
1311	dev->groups = groups;	1311	dev->groups = groups;
1312		1312
1313	dev_set_name(dev, "%s", net->name);	1313	dev_set_name(dev, "%s", net->name);
1314		1314
1315	#ifdef CONFIG_SYSFS	1315	#ifdef CONFIG_SYSFS
1316	/* Allow for a device specific group */	1316	/* Allow for a device specific group */
1317	if (*groups)	1317	if (*groups)
1318	groups++;	1318	groups++;
1319		1319
1320	*groups++ = &netstat_group;	1320	*groups++ = &netstat_group;
1321		1321
1322	#if IS_ENABLED(CONFIG_WIRELESS_EXT) \|\| IS_ENABLED(CONFIG_CFG80211)	1322	#if IS_ENABLED(CONFIG_WIRELESS_EXT) \|\| IS_ENABLED(CONFIG_CFG80211)
1323	if (net->ieee80211_ptr)	1323	if (net->ieee80211_ptr)
1324	*groups++ = &wireless_group;	1324	*groups++ = &wireless_group;
1325	#if IS_ENABLED(CONFIG_WIRELESS_EXT)	1325	#if IS_ENABLED(CONFIG_WIRELESS_EXT)
1326	else if (net->wireless_handlers)	1326	else if (net->wireless_handlers)
1327	*groups++ = &wireless_group;	1327	*groups++ = &wireless_group;
1328	#endif	1328	#endif
1329	#endif	1329	#endif
1330	#endif /* CONFIG_SYSFS */	1330	#endif /* CONFIG_SYSFS */
1331		1331
1332	error = device_add(dev);	1332	error = device_add(dev);
1333	if (error)	1333	if (error)
1334	return error;	1334	return error;
1335		1335
1336	error = register_queue_kobjects(net);	1336	error = register_queue_kobjects(net);
1337	if (error) {	1337	if (error) {
1338	device_del(dev);	1338	device_del(dev);
1339	return error;	1339	return error;
1340	}	1340	}
1341		1341
1342	pm_runtime_set_memalloc_noio(dev, true);	1342	pm_runtime_set_memalloc_noio(dev, true);
1343		1343
1344	return error;	1344	return error;
1345	}	1345	}
1346		1346
1347	int netdev_class_create_file_ns(struct class_attribute *class_attr,	1347	int netdev_class_create_file_ns(struct class_attribute *class_attr,
1348	const void *ns)	1348	const void *ns)
1349	{	1349	{
1350	return class_create_file_ns(&net_class, class_attr, ns);	1350	return class_create_file_ns(&net_class, class_attr, ns);
1351	}	1351	}
1352	EXPORT_SYMBOL(netdev_class_create_file_ns);	1352	EXPORT_SYMBOL(netdev_class_create_file_ns);
1353		1353
1354	void netdev_class_remove_file_ns(struct class_attribute *class_attr,	1354	void netdev_class_remove_file_ns(struct class_attribute *class_attr,
1355	const void *ns)	1355	const void *ns)
1356	{	1356	{
1357	class_remove_file_ns(&net_class, class_attr, ns);	1357	class_remove_file_ns(&net_class, class_attr, ns);
1358	}	1358	}
1359	EXPORT_SYMBOL(netdev_class_remove_file_ns);	1359	EXPORT_SYMBOL(netdev_class_remove_file_ns);
1360		1360
1361	int netdev_kobject_init(void)	1361	int netdev_kobject_init(void)
1362	{	1362	{
1363	kobj_ns_type_register(&net_ns_type_operations);	1363	kobj_ns_type_register(&net_ns_type_operations);
1364	return class_register(&net_class);	1364	return class_register(&net_class);
1365	}	1365	}
1366		1366

net/core/netprio_cgroup.c

Diff comments View file @ 8e3bff9

1	/*	1	/*
2	* net/core/netprio_cgroup.c Priority Control Group	2	* net/core/netprio_cgroup.c Priority Control Group
3	*	3	*
4	* This program is free software; you can redistribute it and/or	4	* This program is free software; you can redistribute it and/or
5	* modify it under the terms of the GNU General Public License	5	* modify it under the terms of the GNU General Public License
6	* as published by the Free Software Foundation; either version	6	* as published by the Free Software Foundation; either version
7	* 2 of the License, or (at your option) any later version.	7	* 2 of the License, or (at your option) any later version.
8	*	8	*
9	* Authors: Neil Horman <nhorman@tuxdriver.com>	9	* Authors: Neil Horman <nhorman@tuxdriver.com>
10	*/	10	*/
11		11
12	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt	12	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13		13
14	#include <linux/module.h>	14	#include <linux/module.h>
15	#include <linux/slab.h>	15	#include <linux/slab.h>
16	#include <linux/types.h>	16	#include <linux/types.h>
17	#include <linux/string.h>	17	#include <linux/string.h>
18	#include <linux/errno.h>	18	#include <linux/errno.h>
19	#include <linux/skbuff.h>	19	#include <linux/skbuff.h>
20	#include <linux/cgroup.h>	20	#include <linux/cgroup.h>
21	#include <linux/rcupdate.h>	21	#include <linux/rcupdate.h>
22	#include <linux/atomic.h>	22	#include <linux/atomic.h>
23	#include <net/rtnetlink.h>	23	#include <net/rtnetlink.h>
24	#include <net/pkt_cls.h>	24	#include <net/pkt_cls.h>
25	#include <net/sock.h>	25	#include <net/sock.h>
26	#include <net/netprio_cgroup.h>	26	#include <net/netprio_cgroup.h>
27		27
28	#include <linux/fdtable.h>	28	#include <linux/fdtable.h>
29		29
30	#define PRIOMAP_MIN_SZ 128	30	#define PRIOMAP_MIN_SZ 128
31		31
32	/*	32	/*
33	* Extend @dev->priomap so that it's large enough to accomodate	33	* Extend @dev->priomap so that it's large enough to accommodate
34	* @target_idx. @dev->priomap.priomap_len > @target_idx after successful	34	* @target_idx. @dev->priomap.priomap_len > @target_idx after successful
35	* return. Must be called under rtnl lock.	35	* return. Must be called under rtnl lock.
36	*/	36	*/
37	static int extend_netdev_table(struct net_device *dev, u32 target_idx)	37	static int extend_netdev_table(struct net_device *dev, u32 target_idx)
38	{	38	{
39	struct netprio_map old, new;	39	struct netprio_map old, new;
40	size_t new_sz, new_len;	40	size_t new_sz, new_len;
41		41
42	/* is the existing priomap large enough? */	42	/* is the existing priomap large enough? */
43	old = rtnl_dereference(dev->priomap);	43	old = rtnl_dereference(dev->priomap);
44	if (old && old->priomap_len > target_idx)	44	if (old && old->priomap_len > target_idx)
45	return 0;	45	return 0;
46		46
47	/*	47	/*
48	* Determine the new size. Let's keep it power-of-two. We start	48	* Determine the new size. Let's keep it power-of-two. We start
49	* from PRIOMAP_MIN_SZ and double it until it's large enough to	49	* from PRIOMAP_MIN_SZ and double it until it's large enough to
50	* accommodate @target_idx.	50	* accommodate @target_idx.
51	*/	51	*/
52	new_sz = PRIOMAP_MIN_SZ;	52	new_sz = PRIOMAP_MIN_SZ;
53	while (true) {	53	while (true) {
54	new_len = (new_sz - offsetof(struct netprio_map, priomap)) /	54	new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
55	sizeof(new->priomap[0]);	55	sizeof(new->priomap[0]);
56	if (new_len > target_idx)	56	if (new_len > target_idx)
57	break;	57	break;
58	new_sz *= 2;	58	new_sz *= 2;
59	/* overflowed? */	59	/* overflowed? */
60	if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))	60	if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
61	return -ENOSPC;	61	return -ENOSPC;
62	}	62	}
63		63
64	/* allocate & copy */	64	/* allocate & copy */
65	new = kzalloc(new_sz, GFP_KERNEL);	65	new = kzalloc(new_sz, GFP_KERNEL);
66	if (!new)	66	if (!new)
67	return -ENOMEM;	67	return -ENOMEM;
68		68
69	if (old)	69	if (old)
70	memcpy(new->priomap, old->priomap,	70	memcpy(new->priomap, old->priomap,
71	old->priomap_len * sizeof(old->priomap[0]));	71	old->priomap_len * sizeof(old->priomap[0]));
72		72
73	new->priomap_len = new_len;	73	new->priomap_len = new_len;
74		74
75	/* install the new priomap */	75	/* install the new priomap */
76	rcu_assign_pointer(dev->priomap, new);	76	rcu_assign_pointer(dev->priomap, new);
77	if (old)	77	if (old)
78	kfree_rcu(old, rcu);	78	kfree_rcu(old, rcu);
79	return 0;	79	return 0;
80	}	80	}
81		81
82	/**	82	/**
83	* netprio_prio - return the effective netprio of a cgroup-net_device pair	83	* netprio_prio - return the effective netprio of a cgroup-net_device pair
84	* @css: css part of the target pair	84	* @css: css part of the target pair
85	* @dev: net_device part of the target pair	85	* @dev: net_device part of the target pair
86	*	86	*
87	* Should be called under RCU read or rtnl lock.	87	* Should be called under RCU read or rtnl lock.
88	*/	88	*/
89	static u32 netprio_prio(struct cgroup_subsys_state css, struct net_device dev)	89	static u32 netprio_prio(struct cgroup_subsys_state css, struct net_device dev)
90	{	90	{
91	struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);	91	struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
92	int id = css->cgroup->id;	92	int id = css->cgroup->id;
93		93
94	if (map && id < map->priomap_len)	94	if (map && id < map->priomap_len)
95	return map->priomap[id];	95	return map->priomap[id];
96	return 0;	96	return 0;
97	}	97	}
98		98
99	/**	99	/**
100	* netprio_set_prio - set netprio on a cgroup-net_device pair	100	* netprio_set_prio - set netprio on a cgroup-net_device pair
101	* @css: css part of the target pair	101	* @css: css part of the target pair
102	* @dev: net_device part of the target pair	102	* @dev: net_device part of the target pair
103	* @prio: prio to set	103	* @prio: prio to set
104	*	104	*
105	* Set netprio to @prio on @css-@dev pair. Should be called under rtnl	105	* Set netprio to @prio on @css-@dev pair. Should be called under rtnl
106	* lock and may fail under memory pressure for non-zero @prio.	106	* lock and may fail under memory pressure for non-zero @prio.
107	*/	107	*/
108	static int netprio_set_prio(struct cgroup_subsys_state *css,	108	static int netprio_set_prio(struct cgroup_subsys_state *css,
109	struct net_device *dev, u32 prio)	109	struct net_device *dev, u32 prio)
110	{	110	{
111	struct netprio_map *map;	111	struct netprio_map *map;
112	int id = css->cgroup->id;	112	int id = css->cgroup->id;
113	int ret;	113	int ret;
114		114
115	/* avoid extending priomap for zero writes */	115	/* avoid extending priomap for zero writes */
116	map = rtnl_dereference(dev->priomap);	116	map = rtnl_dereference(dev->priomap);
117	if (!prio && (!map \|\| map->priomap_len <= id))	117	if (!prio && (!map \|\| map->priomap_len <= id))
118	return 0;	118	return 0;
119		119
120	ret = extend_netdev_table(dev, id);	120	ret = extend_netdev_table(dev, id);
121	if (ret)	121	if (ret)
122	return ret;	122	return ret;
123		123
124	map = rtnl_dereference(dev->priomap);	124	map = rtnl_dereference(dev->priomap);
125	map->priomap[id] = prio;	125	map->priomap[id] = prio;
126	return 0;	126	return 0;
127	}	127	}
128		128
129	static struct cgroup_subsys_state *	129	static struct cgroup_subsys_state *
130	cgrp_css_alloc(struct cgroup_subsys_state *parent_css)	130	cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
131	{	131	{
132	struct cgroup_subsys_state *css;	132	struct cgroup_subsys_state *css;
133		133
134	css = kzalloc(sizeof(*css), GFP_KERNEL);	134	css = kzalloc(sizeof(*css), GFP_KERNEL);
135	if (!css)	135	if (!css)
136	return ERR_PTR(-ENOMEM);	136	return ERR_PTR(-ENOMEM);
137		137
138	return css;	138	return css;
139	}	139	}
140		140
141	static int cgrp_css_online(struct cgroup_subsys_state *css)	141	static int cgrp_css_online(struct cgroup_subsys_state *css)
142	{	142	{
143	struct cgroup_subsys_state *parent_css = css_parent(css);	143	struct cgroup_subsys_state *parent_css = css_parent(css);
144	struct net_device *dev;	144	struct net_device *dev;
145	int ret = 0;	145	int ret = 0;
146		146
147	if (!parent_css)	147	if (!parent_css)
148	return 0;	148	return 0;
149		149
150	rtnl_lock();	150	rtnl_lock();
151	/*	151	/*
152	* Inherit prios from the parent. As all prios are set during	152	* Inherit prios from the parent. As all prios are set during
153	* onlining, there is no need to clear them on offline.	153	* onlining, there is no need to clear them on offline.
154	*/	154	*/
155	for_each_netdev(&init_net, dev) {	155	for_each_netdev(&init_net, dev) {
156	u32 prio = netprio_prio(parent_css, dev);	156	u32 prio = netprio_prio(parent_css, dev);
157		157
158	ret = netprio_set_prio(css, dev, prio);	158	ret = netprio_set_prio(css, dev, prio);
159	if (ret)	159	if (ret)
160	break;	160	break;
161	}	161	}
162	rtnl_unlock();	162	rtnl_unlock();
163	return ret;	163	return ret;
164	}	164	}
165		165
166	static void cgrp_css_free(struct cgroup_subsys_state *css)	166	static void cgrp_css_free(struct cgroup_subsys_state *css)
167	{	167	{
168	kfree(css);	168	kfree(css);
169	}	169	}
170		170
171	static u64 read_prioidx(struct cgroup_subsys_state css, struct cftype cft)	171	static u64 read_prioidx(struct cgroup_subsys_state css, struct cftype cft)
172	{	172	{
173	return css->cgroup->id;	173	return css->cgroup->id;
174	}	174	}
175		175
176	static int read_priomap(struct cgroup_subsys_state css, struct cftype cft,	176	static int read_priomap(struct cgroup_subsys_state css, struct cftype cft,
177	struct cgroup_map_cb *cb)	177	struct cgroup_map_cb *cb)
178	{	178	{
179	struct net_device *dev;	179	struct net_device *dev;
180		180
181	rcu_read_lock();	181	rcu_read_lock();
182	for_each_netdev_rcu(&init_net, dev)	182	for_each_netdev_rcu(&init_net, dev)
183	cb->fill(cb, dev->name, netprio_prio(css, dev));	183	cb->fill(cb, dev->name, netprio_prio(css, dev));
184	rcu_read_unlock();	184	rcu_read_unlock();
185	return 0;	185	return 0;
186	}	186	}
187		187
188	static int write_priomap(struct cgroup_subsys_state css, struct cftype cft,	188	static int write_priomap(struct cgroup_subsys_state css, struct cftype cft,
189	const char *buffer)	189	const char *buffer)
190	{	190	{
191	char devname[IFNAMSIZ + 1];	191	char devname[IFNAMSIZ + 1];
192	struct net_device *dev;	192	struct net_device *dev;
193	u32 prio;	193	u32 prio;
194	int ret;	194	int ret;
195		195
196	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)	196	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
197	return -EINVAL;	197	return -EINVAL;
198		198
199	dev = dev_get_by_name(&init_net, devname);	199	dev = dev_get_by_name(&init_net, devname);
200	if (!dev)	200	if (!dev)
201	return -ENODEV;	201	return -ENODEV;
202		202
203	rtnl_lock();	203	rtnl_lock();
204		204
205	ret = netprio_set_prio(css, dev, prio);	205	ret = netprio_set_prio(css, dev, prio);
206		206
207	rtnl_unlock();	207	rtnl_unlock();
208	dev_put(dev);	208	dev_put(dev);
209	return ret;	209	return ret;
210	}	210	}
211		211
212	static int update_netprio(const void v, struct file file, unsigned n)	212	static int update_netprio(const void v, struct file file, unsigned n)
213	{	213	{
214	int err;	214	int err;
215	struct socket *sock = sock_from_file(file, &err);	215	struct socket *sock = sock_from_file(file, &err);
216	if (sock)	216	if (sock)
217	sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;	217	sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
218	return 0;	218	return 0;
219	}	219	}
220		220
221	static void net_prio_attach(struct cgroup_subsys_state *css,	221	static void net_prio_attach(struct cgroup_subsys_state *css,
222	struct cgroup_taskset *tset)	222	struct cgroup_taskset *tset)
223	{	223	{
224	struct task_struct *p;	224	struct task_struct *p;
225	void v = (void )(unsigned long)css->cgroup->id;	225	void v = (void )(unsigned long)css->cgroup->id;
226		226
227	cgroup_taskset_for_each(p, css, tset) {	227	cgroup_taskset_for_each(p, css, tset) {
228	task_lock(p);	228	task_lock(p);
229	iterate_fd(p->files, 0, update_netprio, v);	229	iterate_fd(p->files, 0, update_netprio, v);
230	task_unlock(p);	230	task_unlock(p);
231	}	231	}
232	}	232	}
233		233
234	static struct cftype ss_files[] = {	234	static struct cftype ss_files[] = {
235	{	235	{
236	.name = "prioidx",	236	.name = "prioidx",
237	.read_u64 = read_prioidx,	237	.read_u64 = read_prioidx,
238	},	238	},
239	{	239	{
240	.name = "ifpriomap",	240	.name = "ifpriomap",
241	.read_map = read_priomap,	241	.read_map = read_priomap,
242	.write_string = write_priomap,	242	.write_string = write_priomap,
243	},	243	},
244	{ } /* terminate */	244	{ } /* terminate */
245	};	245	};
246		246
247	struct cgroup_subsys net_prio_subsys = {	247	struct cgroup_subsys net_prio_subsys = {
248	.name = "net_prio",	248	.name = "net_prio",
249	.css_alloc = cgrp_css_alloc,	249	.css_alloc = cgrp_css_alloc,
250	.css_online = cgrp_css_online,	250	.css_online = cgrp_css_online,
251	.css_free = cgrp_css_free,	251	.css_free = cgrp_css_free,
252	.attach = net_prio_attach,	252	.attach = net_prio_attach,
253	.subsys_id = net_prio_subsys_id,	253	.subsys_id = net_prio_subsys_id,
254	.base_cftypes = ss_files,	254	.base_cftypes = ss_files,
255	.module = THIS_MODULE,	255	.module = THIS_MODULE,
256	};	256	};
257		257
258	static int netprio_device_event(struct notifier_block *unused,	258	static int netprio_device_event(struct notifier_block *unused,
259	unsigned long event, void *ptr)	259	unsigned long event, void *ptr)
260	{	260	{
261	struct net_device *dev = netdev_notifier_info_to_dev(ptr);	261	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
262	struct netprio_map *old;	262	struct netprio_map *old;
263		263
264	/*	264	/*
265	* Note this is called with rtnl_lock held so we have update side	265	* Note this is called with rtnl_lock held so we have update side
266	* protection on our rcu assignments	266	* protection on our rcu assignments
267	*/	267	*/
268		268
269	switch (event) {	269	switch (event) {
270	case NETDEV_UNREGISTER:	270	case NETDEV_UNREGISTER:
271	old = rtnl_dereference(dev->priomap);	271	old = rtnl_dereference(dev->priomap);
272	RCU_INIT_POINTER(dev->priomap, NULL);	272	RCU_INIT_POINTER(dev->priomap, NULL);
273	if (old)	273	if (old)
274	kfree_rcu(old, rcu);	274	kfree_rcu(old, rcu);
275	break;	275	break;
276	}	276	}
277	return NOTIFY_DONE;	277	return NOTIFY_DONE;
278	}	278	}
279		279
280	static struct notifier_block netprio_device_notifier = {	280	static struct notifier_block netprio_device_notifier = {
281	.notifier_call = netprio_device_event	281	.notifier_call = netprio_device_event
282	};	282	};
283		283
284	static int __init init_cgroup_netprio(void)	284	static int __init init_cgroup_netprio(void)
285	{	285	{
286	int ret;	286	int ret;
287		287
288	ret = cgroup_load_subsys(&net_prio_subsys);	288	ret = cgroup_load_subsys(&net_prio_subsys);
289	if (ret)	289	if (ret)
290	goto out;	290	goto out;
291		291
292	register_netdevice_notifier(&netprio_device_notifier);	292	register_netdevice_notifier(&netprio_device_notifier);
293		293
294	out:	294	out:
295	return ret;	295	return ret;
296	}	296	}
297		297
298	static void __exit exit_cgroup_netprio(void)	298	static void __exit exit_cgroup_netprio(void)
299	{	299	{
300	struct netprio_map *old;	300	struct netprio_map *old;
301	struct net_device *dev;	301	struct net_device *dev;
302		302
303	unregister_netdevice_notifier(&netprio_device_notifier);	303	unregister_netdevice_notifier(&netprio_device_notifier);
304		304
305	cgroup_unload_subsys(&net_prio_subsys);	305	cgroup_unload_subsys(&net_prio_subsys);
306		306
307	rtnl_lock();	307	rtnl_lock();
308	for_each_netdev(&init_net, dev) {	308	for_each_netdev(&init_net, dev) {
309	old = rtnl_dereference(dev->priomap);	309	old = rtnl_dereference(dev->priomap);
310	RCU_INIT_POINTER(dev->priomap, NULL);	310	RCU_INIT_POINTER(dev->priomap, NULL);
311	if (old)	311	if (old)
312	kfree_rcu(old, rcu);	312	kfree_rcu(old, rcu);
313	}	313	}
314	rtnl_unlock();	314	rtnl_unlock();
315	}	315	}
316		316
317	module_init(init_cgroup_netprio);	317	module_init(init_cgroup_netprio);
318	module_exit(exit_cgroup_netprio);	318	module_exit(exit_cgroup_netprio);
319	MODULE_LICENSE("GPL v2");	319	MODULE_LICENSE("GPL v2");
320		320

net/ipv4/ip_sockglue.c

Diff comments View file @ 8e3bff9

1	/*	1	/*
2	* INET An implementation of the TCP/IP protocol suite for the LINUX	2	* INET An implementation of the TCP/IP protocol suite for the LINUX
3	* operating system. INET is implemented using the BSD Socket	3	* operating system. INET is implemented using the BSD Socket
4	* interface as the means of communication with the user level.	4	* interface as the means of communication with the user level.
5	*	5	*
6	* The IP to API glue.	6	* The IP to API glue.
7	*	7	*
8	* Authors: see ip.c	8	* Authors: see ip.c
9	*	9	*
10	* Fixes:	10	* Fixes:
11	* Many : Split from ip.c , see ip.c for history.	11	* Many : Split from ip.c , see ip.c for history.
12	* Martin Mares : TOS setting fixed.	12	* Martin Mares : TOS setting fixed.
13	* Alan Cox : Fixed a couple of oopses in Martin's	13	* Alan Cox : Fixed a couple of oopses in Martin's
14	* TOS tweaks.	14	* TOS tweaks.
15	* Mike McLagan : Routing by source	15	* Mike McLagan : Routing by source
16	*/	16	*/
17		17
18	#include <linux/module.h>	18	#include <linux/module.h>
19	#include <linux/types.h>	19	#include <linux/types.h>
20	#include <linux/mm.h>	20	#include <linux/mm.h>
21	#include <linux/skbuff.h>	21	#include <linux/skbuff.h>
22	#include <linux/ip.h>	22	#include <linux/ip.h>
23	#include <linux/icmp.h>	23	#include <linux/icmp.h>
24	#include <linux/inetdevice.h>	24	#include <linux/inetdevice.h>
25	#include <linux/netdevice.h>	25	#include <linux/netdevice.h>
26	#include <linux/slab.h>	26	#include <linux/slab.h>
27	#include <net/sock.h>	27	#include <net/sock.h>
28	#include <net/ip.h>	28	#include <net/ip.h>
29	#include <net/icmp.h>	29	#include <net/icmp.h>
30	#include <net/tcp_states.h>	30	#include <net/tcp_states.h>
31	#include <linux/udp.h>	31	#include <linux/udp.h>
32	#include <linux/igmp.h>	32	#include <linux/igmp.h>
33	#include <linux/netfilter.h>	33	#include <linux/netfilter.h>
34	#include <linux/route.h>	34	#include <linux/route.h>
35	#include <linux/mroute.h>	35	#include <linux/mroute.h>
36	#include <net/inet_ecn.h>	36	#include <net/inet_ecn.h>
37	#include <net/route.h>	37	#include <net/route.h>
38	#include <net/xfrm.h>	38	#include <net/xfrm.h>
39	#include <net/compat.h>	39	#include <net/compat.h>
40	#if IS_ENABLED(CONFIG_IPV6)	40	#if IS_ENABLED(CONFIG_IPV6)
41	#include <net/transp_v6.h>	41	#include <net/transp_v6.h>
42	#endif	42	#endif
43	#include <net/ip_fib.h>	43	#include <net/ip_fib.h>
44		44
45	#include <linux/errqueue.h>	45	#include <linux/errqueue.h>
46	#include <asm/uaccess.h>	46	#include <asm/uaccess.h>
47		47
48	#define IP_CMSG_PKTINFO 1	48	#define IP_CMSG_PKTINFO 1
49	#define IP_CMSG_TTL 2	49	#define IP_CMSG_TTL 2
50	#define IP_CMSG_TOS 4	50	#define IP_CMSG_TOS 4
51	#define IP_CMSG_RECVOPTS 8	51	#define IP_CMSG_RECVOPTS 8
52	#define IP_CMSG_RETOPTS 16	52	#define IP_CMSG_RETOPTS 16
53	#define IP_CMSG_PASSSEC 32	53	#define IP_CMSG_PASSSEC 32
54	#define IP_CMSG_ORIGDSTADDR 64	54	#define IP_CMSG_ORIGDSTADDR 64
55		55
56	/*	56	/*
57	* SOL_IP control messages.	57	* SOL_IP control messages.
58	*/	58	*/
59	#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))	59	#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
60		60
61	static void ip_cmsg_recv_pktinfo(struct msghdr msg, struct sk_buff skb)	61	static void ip_cmsg_recv_pktinfo(struct msghdr msg, struct sk_buff skb)
62	{	62	{
63	struct in_pktinfo info = *PKTINFO_SKB_CB(skb);	63	struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
64		64
65	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;	65	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
66		66
67	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);	67	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
68	}	68	}
69		69
70	static void ip_cmsg_recv_ttl(struct msghdr msg, struct sk_buff skb)	70	static void ip_cmsg_recv_ttl(struct msghdr msg, struct sk_buff skb)
71	{	71	{
72	int ttl = ip_hdr(skb)->ttl;	72	int ttl = ip_hdr(skb)->ttl;
73	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);	73	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
74	}	74	}
75		75
76	static void ip_cmsg_recv_tos(struct msghdr msg, struct sk_buff skb)	76	static void ip_cmsg_recv_tos(struct msghdr msg, struct sk_buff skb)
77	{	77	{
78	put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);	78	put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
79	}	79	}
80		80
81	static void ip_cmsg_recv_opts(struct msghdr msg, struct sk_buff skb)	81	static void ip_cmsg_recv_opts(struct msghdr msg, struct sk_buff skb)
82	{	82	{
83	if (IPCB(skb)->opt.optlen == 0)	83	if (IPCB(skb)->opt.optlen == 0)
84	return;	84	return;
85		85
86	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,	86	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
87	ip_hdr(skb) + 1);	87	ip_hdr(skb) + 1);
88	}	88	}
89		89
90		90
91	static void ip_cmsg_recv_retopts(struct msghdr msg, struct sk_buff skb)	91	static void ip_cmsg_recv_retopts(struct msghdr msg, struct sk_buff skb)
92	{	92	{
93	unsigned char optbuf[sizeof(struct ip_options) + 40];	93	unsigned char optbuf[sizeof(struct ip_options) + 40];
94	struct ip_options opt = (struct ip_options )optbuf;	94	struct ip_options opt = (struct ip_options )optbuf;
95		95
96	if (IPCB(skb)->opt.optlen == 0)	96	if (IPCB(skb)->opt.optlen == 0)
97	return;	97	return;
98		98
99	if (ip_options_echo(opt, skb)) {	99	if (ip_options_echo(opt, skb)) {
100	msg->msg_flags \|= MSG_CTRUNC;	100	msg->msg_flags \|= MSG_CTRUNC;
101	return;	101	return;
102	}	102	}
103	ip_options_undo(opt);	103	ip_options_undo(opt);
104		104
105	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);	105	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
106	}	106	}
107		107
108	static void ip_cmsg_recv_security(struct msghdr msg, struct sk_buff skb)	108	static void ip_cmsg_recv_security(struct msghdr msg, struct sk_buff skb)
109	{	109	{
110	char *secdata;	110	char *secdata;
111	u32 seclen, secid;	111	u32 seclen, secid;
112	int err;	112	int err;
113		113
114	err = security_socket_getpeersec_dgram(NULL, skb, &secid);	114	err = security_socket_getpeersec_dgram(NULL, skb, &secid);
115	if (err)	115	if (err)
116	return;	116	return;
117		117
118	err = security_secid_to_secctx(secid, &secdata, &seclen);	118	err = security_secid_to_secctx(secid, &secdata, &seclen);
119	if (err)	119	if (err)
120	return;	120	return;
121		121
122	put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);	122	put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
123	security_release_secctx(secdata, seclen);	123	security_release_secctx(secdata, seclen);
124	}	124	}
125		125
126	static void ip_cmsg_recv_dstaddr(struct msghdr msg, struct sk_buff skb)	126	static void ip_cmsg_recv_dstaddr(struct msghdr msg, struct sk_buff skb)
127	{	127	{
128	struct sockaddr_in sin;	128	struct sockaddr_in sin;
129	const struct iphdr *iph = ip_hdr(skb);	129	const struct iphdr *iph = ip_hdr(skb);
130	__be16 ports = (__be16 )skb_transport_header(skb);	130	__be16 ports = (__be16 )skb_transport_header(skb);
131		131
132	if (skb_transport_offset(skb) + 4 > skb->len)	132	if (skb_transport_offset(skb) + 4 > skb->len)
133	return;	133	return;
134		134
135	/* All current transport protocols have the port numbers in the	135	/* All current transport protocols have the port numbers in the
136	* first four bytes of the transport header and this function is	136	* first four bytes of the transport header and this function is
137	* written with this assumption in mind.	137	* written with this assumption in mind.
138	*/	138	*/
139		139
140	sin.sin_family = AF_INET;	140	sin.sin_family = AF_INET;
141	sin.sin_addr.s_addr = iph->daddr;	141	sin.sin_addr.s_addr = iph->daddr;
142	sin.sin_port = ports[1];	142	sin.sin_port = ports[1];
143	memset(sin.sin_zero, 0, sizeof(sin.sin_zero));	143	memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
144		144
145	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);	145	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
146	}	146	}
147		147
148	void ip_cmsg_recv(struct msghdr msg, struct sk_buff skb)	148	void ip_cmsg_recv(struct msghdr msg, struct sk_buff skb)
149	{	149	{
150	struct inet_sock *inet = inet_sk(skb->sk);	150	struct inet_sock *inet = inet_sk(skb->sk);
151	unsigned int flags = inet->cmsg_flags;	151	unsigned int flags = inet->cmsg_flags;
152		152
153	/* Ordered by supposed usage frequency */	153	/* Ordered by supposed usage frequency */
154	if (flags & 1)	154	if (flags & 1)
155	ip_cmsg_recv_pktinfo(msg, skb);	155	ip_cmsg_recv_pktinfo(msg, skb);
156	if ((flags >>= 1) == 0)	156	if ((flags >>= 1) == 0)
157	return;	157	return;
158		158
159	if (flags & 1)	159	if (flags & 1)
160	ip_cmsg_recv_ttl(msg, skb);	160	ip_cmsg_recv_ttl(msg, skb);
161	if ((flags >>= 1) == 0)	161	if ((flags >>= 1) == 0)
162	return;	162	return;
163		163
164	if (flags & 1)	164	if (flags & 1)
165	ip_cmsg_recv_tos(msg, skb);	165	ip_cmsg_recv_tos(msg, skb);
166	if ((flags >>= 1) == 0)	166	if ((flags >>= 1) == 0)
167	return;	167	return;
168		168
169	if (flags & 1)	169	if (flags & 1)
170	ip_cmsg_recv_opts(msg, skb);	170	ip_cmsg_recv_opts(msg, skb);
171	if ((flags >>= 1) == 0)	171	if ((flags >>= 1) == 0)
172	return;	172	return;
173		173
174	if (flags & 1)	174	if (flags & 1)
175	ip_cmsg_recv_retopts(msg, skb);	175	ip_cmsg_recv_retopts(msg, skb);
176	if ((flags >>= 1) == 0)	176	if ((flags >>= 1) == 0)
177	return;	177	return;
178		178
179	if (flags & 1)	179	if (flags & 1)
180	ip_cmsg_recv_security(msg, skb);	180	ip_cmsg_recv_security(msg, skb);
181		181
182	if ((flags >>= 1) == 0)	182	if ((flags >>= 1) == 0)
183	return;	183	return;
184	if (flags & 1)	184	if (flags & 1)
185	ip_cmsg_recv_dstaddr(msg, skb);	185	ip_cmsg_recv_dstaddr(msg, skb);
186		186
187	}	187	}
188	EXPORT_SYMBOL(ip_cmsg_recv);	188	EXPORT_SYMBOL(ip_cmsg_recv);
189		189
190	int ip_cmsg_send(struct net net, struct msghdr msg, struct ipcm_cookie *ipc)	190	int ip_cmsg_send(struct net net, struct msghdr msg, struct ipcm_cookie *ipc)
191	{	191	{
192	int err, val;	192	int err, val;
193	struct cmsghdr *cmsg;	193	struct cmsghdr *cmsg;
194		194
195	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {	195	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
196	if (!CMSG_OK(msg, cmsg))	196	if (!CMSG_OK(msg, cmsg))
197	return -EINVAL;	197	return -EINVAL;
198	if (cmsg->cmsg_level != SOL_IP)	198	if (cmsg->cmsg_level != SOL_IP)
199	continue;	199	continue;
200	switch (cmsg->cmsg_type) {	200	switch (cmsg->cmsg_type) {
201	case IP_RETOPTS:	201	case IP_RETOPTS:
202	err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));	202	err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
203	err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),	203	err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
204	err < 40 ? err : 40);	204	err < 40 ? err : 40);
205	if (err)	205	if (err)
206	return err;	206	return err;
207	break;	207	break;
208	case IP_PKTINFO:	208	case IP_PKTINFO:
209	{	209	{
210	struct in_pktinfo *info;	210	struct in_pktinfo *info;
211	if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))	211	if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
212	return -EINVAL;	212	return -EINVAL;
213	info = (struct in_pktinfo *)CMSG_DATA(cmsg);	213	info = (struct in_pktinfo *)CMSG_DATA(cmsg);
214	ipc->oif = info->ipi_ifindex;	214	ipc->oif = info->ipi_ifindex;
215	ipc->addr = info->ipi_spec_dst.s_addr;	215	ipc->addr = info->ipi_spec_dst.s_addr;
216	break;	216	break;
217	}	217	}
218	case IP_TTL:	218	case IP_TTL:
219	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))	219	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
220	return -EINVAL;	220	return -EINVAL;
221	val = (int )CMSG_DATA(cmsg);	221	val = (int )CMSG_DATA(cmsg);
222	if (val < 1 \|\| val > 255)	222	if (val < 1 \|\| val > 255)
223	return -EINVAL;	223	return -EINVAL;
224	ipc->ttl = val;	224	ipc->ttl = val;
225	break;	225	break;
226	case IP_TOS:	226	case IP_TOS:
227	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))	227	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
228	return -EINVAL;	228	return -EINVAL;
229	val = (int )CMSG_DATA(cmsg);	229	val = (int )CMSG_DATA(cmsg);
230	if (val < 0 \|\| val > 255)	230	if (val < 0 \|\| val > 255)
231	return -EINVAL;	231	return -EINVAL;
232	ipc->tos = val;	232	ipc->tos = val;
233	ipc->priority = rt_tos2priority(ipc->tos);	233	ipc->priority = rt_tos2priority(ipc->tos);
234	break;	234	break;
235		235
236	default:	236	default:
237	return -EINVAL;	237	return -EINVAL;
238	}	238	}
239	}	239	}
240	return 0;	240	return 0;
241	}	241	}
242		242
243		243
244	/* Special input handler for packets caught by router alert option.	244	/* Special input handler for packets caught by router alert option.
245	They are selected only by protocol field, and then processed likely	245	They are selected only by protocol field, and then processed likely
246	local ones; but only if someone wants them! Otherwise, router	246	local ones; but only if someone wants them! Otherwise, router
247	not running rsvpd will kill RSVP.	247	not running rsvpd will kill RSVP.
248		248
249	It is user level problem, what it will make with them.	249	It is user level problem, what it will make with them.
250	I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),	250	I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
251	but receiver should be enough clever f.e. to forward mtrace requests,	251	but receiver should be enough clever f.e. to forward mtrace requests,
252	sent to multicast group to reach destination designated router.	252	sent to multicast group to reach destination designated router.
253	*/	253	*/
254	struct ip_ra_chain __rcu *ip_ra_chain;	254	struct ip_ra_chain __rcu *ip_ra_chain;
255	static DEFINE_SPINLOCK(ip_ra_lock);	255	static DEFINE_SPINLOCK(ip_ra_lock);
256		256
257		257
258	static void ip_ra_destroy_rcu(struct rcu_head *head)	258	static void ip_ra_destroy_rcu(struct rcu_head *head)
259	{	259	{
260	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);	260	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
261		261
262	sock_put(ra->saved_sk);	262	sock_put(ra->saved_sk);
263	kfree(ra);	263	kfree(ra);
264	}	264	}
265		265
266	int ip_ra_control(struct sock *sk, unsigned char on,	266	int ip_ra_control(struct sock *sk, unsigned char on,
267	void (destructor)(struct sock ))	267	void (destructor)(struct sock ))
268	{	268	{
269	struct ip_ra_chain ra, new_ra;	269	struct ip_ra_chain ra, new_ra;
270	struct ip_ra_chain __rcu **rap;	270	struct ip_ra_chain __rcu **rap;
271		271
272	if (sk->sk_type != SOCK_RAW \|\| inet_sk(sk)->inet_num == IPPROTO_RAW)	272	if (sk->sk_type != SOCK_RAW \|\| inet_sk(sk)->inet_num == IPPROTO_RAW)
273	return -EINVAL;	273	return -EINVAL;
274		274
275	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;	275	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
276		276
277	spin_lock_bh(&ip_ra_lock);	277	spin_lock_bh(&ip_ra_lock);
278	for (rap = &ip_ra_chain;	278	for (rap = &ip_ra_chain;
279	(ra = rcu_dereference_protected(*rap,	279	(ra = rcu_dereference_protected(*rap,
280	lockdep_is_held(&ip_ra_lock))) != NULL;	280	lockdep_is_held(&ip_ra_lock))) != NULL;
281	rap = &ra->next) {	281	rap = &ra->next) {
282	if (ra->sk == sk) {	282	if (ra->sk == sk) {
283	if (on) {	283	if (on) {
284	spin_unlock_bh(&ip_ra_lock);	284	spin_unlock_bh(&ip_ra_lock);
285	kfree(new_ra);	285	kfree(new_ra);
286	return -EADDRINUSE;	286	return -EADDRINUSE;
287	}	287	}
288	/* dont let ip_call_ra_chain() use sk again */	288	/* dont let ip_call_ra_chain() use sk again */
289	ra->sk = NULL;	289	ra->sk = NULL;
290	rcu_assign_pointer(*rap, ra->next);	290	rcu_assign_pointer(*rap, ra->next);
291	spin_unlock_bh(&ip_ra_lock);	291	spin_unlock_bh(&ip_ra_lock);
292		292
293	if (ra->destructor)	293	if (ra->destructor)
294	ra->destructor(sk);	294	ra->destructor(sk);
295	/*	295	/*
296	* Delay sock_put(sk) and kfree(ra) after one rcu grace	296	* Delay sock_put(sk) and kfree(ra) after one rcu grace
297	* period. This guarantee ip_call_ra_chain() dont need	297	* period. This guarantee ip_call_ra_chain() dont need
298	* to mess with socket refcounts.	298	* to mess with socket refcounts.
299	*/	299	*/
300	ra->saved_sk = sk;	300	ra->saved_sk = sk;
301	call_rcu(&ra->rcu, ip_ra_destroy_rcu);	301	call_rcu(&ra->rcu, ip_ra_destroy_rcu);
302	return 0;	302	return 0;
303	}	303	}
304	}	304	}
305	if (new_ra == NULL) {	305	if (new_ra == NULL) {
306	spin_unlock_bh(&ip_ra_lock);	306	spin_unlock_bh(&ip_ra_lock);
307	return -ENOBUFS;	307	return -ENOBUFS;
308	}	308	}
309	new_ra->sk = sk;	309	new_ra->sk = sk;
310	new_ra->destructor = destructor;	310	new_ra->destructor = destructor;
311		311
312	new_ra->next = ra;	312	new_ra->next = ra;
313	rcu_assign_pointer(*rap, new_ra);	313	rcu_assign_pointer(*rap, new_ra);
314	sock_hold(sk);	314	sock_hold(sk);
315	spin_unlock_bh(&ip_ra_lock);	315	spin_unlock_bh(&ip_ra_lock);
316		316
317	return 0;	317	return 0;
318	}	318	}
319		319
320	void ip_icmp_error(struct sock sk, struct sk_buff skb, int err,	320	void ip_icmp_error(struct sock sk, struct sk_buff skb, int err,
321	__be16 port, u32 info, u8 *payload)	321	__be16 port, u32 info, u8 *payload)
322	{	322	{
323	struct sock_exterr_skb *serr;	323	struct sock_exterr_skb *serr;
324		324
325	skb = skb_clone(skb, GFP_ATOMIC);	325	skb = skb_clone(skb, GFP_ATOMIC);
326	if (!skb)	326	if (!skb)
327	return;	327	return;
328		328
329	serr = SKB_EXT_ERR(skb);	329	serr = SKB_EXT_ERR(skb);
330	serr->ee.ee_errno = err;	330	serr->ee.ee_errno = err;
331	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;	331	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
332	serr->ee.ee_type = icmp_hdr(skb)->type;	332	serr->ee.ee_type = icmp_hdr(skb)->type;
333	serr->ee.ee_code = icmp_hdr(skb)->code;	333	serr->ee.ee_code = icmp_hdr(skb)->code;
334	serr->ee.ee_pad = 0;	334	serr->ee.ee_pad = 0;
335	serr->ee.ee_info = info;	335	serr->ee.ee_info = info;
336	serr->ee.ee_data = 0;	336	serr->ee.ee_data = 0;
337	serr->addr_offset = (u8 )&(((struct iphdr )(icmp_hdr(skb) + 1))->daddr) -	337	serr->addr_offset = (u8 )&(((struct iphdr )(icmp_hdr(skb) + 1))->daddr) -
338	skb_network_header(skb);	338	skb_network_header(skb);
339	serr->port = port;	339	serr->port = port;
340		340
341	if (skb_pull(skb, payload - skb->data) != NULL) {	341	if (skb_pull(skb, payload - skb->data) != NULL) {
342	skb_reset_transport_header(skb);	342	skb_reset_transport_header(skb);
343	if (sock_queue_err_skb(sk, skb) == 0)	343	if (sock_queue_err_skb(sk, skb) == 0)
344	return;	344	return;
345	}	345	}
346	kfree_skb(skb);	346	kfree_skb(skb);
347	}	347	}
348		348
349	void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)	349	void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
350	{	350	{
351	struct inet_sock *inet = inet_sk(sk);	351	struct inet_sock *inet = inet_sk(sk);
352	struct sock_exterr_skb *serr;	352	struct sock_exterr_skb *serr;
353	struct iphdr *iph;	353	struct iphdr *iph;
354	struct sk_buff *skb;	354	struct sk_buff *skb;
355		355
356	if (!inet->recverr)	356	if (!inet->recverr)
357	return;	357	return;
358		358
359	skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);	359	skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
360	if (!skb)	360	if (!skb)
361	return;	361	return;
362		362
363	skb_put(skb, sizeof(struct iphdr));	363	skb_put(skb, sizeof(struct iphdr));
364	skb_reset_network_header(skb);	364	skb_reset_network_header(skb);
365	iph = ip_hdr(skb);	365	iph = ip_hdr(skb);
366	iph->daddr = daddr;	366	iph->daddr = daddr;
367		367
368	serr = SKB_EXT_ERR(skb);	368	serr = SKB_EXT_ERR(skb);
369	serr->ee.ee_errno = err;	369	serr->ee.ee_errno = err;
370	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;	370	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
371	serr->ee.ee_type = 0;	371	serr->ee.ee_type = 0;
372	serr->ee.ee_code = 0;	372	serr->ee.ee_code = 0;
373	serr->ee.ee_pad = 0;	373	serr->ee.ee_pad = 0;
374	serr->ee.ee_info = info;	374	serr->ee.ee_info = info;
375	serr->ee.ee_data = 0;	375	serr->ee.ee_data = 0;
376	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);	376	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
377	serr->port = port;	377	serr->port = port;
378		378
379	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);	379	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
380	skb_reset_transport_header(skb);	380	skb_reset_transport_header(skb);
381		381
382	if (sock_queue_err_skb(sk, skb))	382	if (sock_queue_err_skb(sk, skb))
383	kfree_skb(skb);	383	kfree_skb(skb);
384	}	384	}
385		385
386	/*	386	/*
387	* Handle MSG_ERRQUEUE	387	* Handle MSG_ERRQUEUE
388	*/	388	*/
389	int ip_recv_error(struct sock sk, struct msghdr msg, int len, int *addr_len)	389	int ip_recv_error(struct sock sk, struct msghdr msg, int len, int *addr_len)
390	{	390	{
391	struct sock_exterr_skb *serr;	391	struct sock_exterr_skb *serr;
392	struct sk_buff skb, skb2;	392	struct sk_buff skb, skb2;
393	struct sockaddr_in *sin;	393	struct sockaddr_in *sin;
394	struct {	394	struct {
395	struct sock_extended_err ee;	395	struct sock_extended_err ee;
396	struct sockaddr_in offender;	396	struct sockaddr_in offender;
397	} errhdr;	397	} errhdr;
398	int err;	398	int err;
399	int copied;	399	int copied;
400		400
401	err = -EAGAIN;	401	err = -EAGAIN;
402	skb = skb_dequeue(&sk->sk_error_queue);	402	skb = skb_dequeue(&sk->sk_error_queue);
403	if (skb == NULL)	403	if (skb == NULL)
404	goto out;	404	goto out;
405		405
406	copied = skb->len;	406	copied = skb->len;
407	if (copied > len) {	407	if (copied > len) {
408	msg->msg_flags \|= MSG_TRUNC;	408	msg->msg_flags \|= MSG_TRUNC;
409	copied = len;	409	copied = len;
410	}	410	}
411	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);	411	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
412	if (err)	412	if (err)
413	goto out_free_skb;	413	goto out_free_skb;
414		414
415	sock_recv_timestamp(msg, sk, skb);	415	sock_recv_timestamp(msg, sk, skb);
416		416
417	serr = SKB_EXT_ERR(skb);	417	serr = SKB_EXT_ERR(skb);
418		418
419	sin = (struct sockaddr_in *)msg->msg_name;	419	sin = (struct sockaddr_in *)msg->msg_name;
420	if (sin) {	420	if (sin) {
421	sin->sin_family = AF_INET;	421	sin->sin_family = AF_INET;
422	sin->sin_addr.s_addr = (__be32 )(skb_network_header(skb) +	422	sin->sin_addr.s_addr = (__be32 )(skb_network_header(skb) +
423	serr->addr_offset);	423	serr->addr_offset);
424	sin->sin_port = serr->port;	424	sin->sin_port = serr->port;
425	memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));	425	memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
426	addr_len = sizeof(sin);	426	addr_len = sizeof(sin);
427	}	427	}
428		428
429	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));	429	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
430	sin = &errhdr.offender;	430	sin = &errhdr.offender;
431	sin->sin_family = AF_UNSPEC;	431	sin->sin_family = AF_UNSPEC;
432	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {	432	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
433	struct inet_sock *inet = inet_sk(sk);	433	struct inet_sock *inet = inet_sk(sk);
434		434
435	sin->sin_family = AF_INET;	435	sin->sin_family = AF_INET;
436	sin->sin_addr.s_addr = ip_hdr(skb)->saddr;	436	sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
437	sin->sin_port = 0;	437	sin->sin_port = 0;
438	memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));	438	memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
439	if (inet->cmsg_flags)	439	if (inet->cmsg_flags)
440	ip_cmsg_recv(msg, skb);	440	ip_cmsg_recv(msg, skb);
441	}	441	}
442		442
443	put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);	443	put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
444		444
445	/* Now we could try to dump offended packet options */	445	/* Now we could try to dump offended packet options */
446		446
447	msg->msg_flags \|= MSG_ERRQUEUE;	447	msg->msg_flags \|= MSG_ERRQUEUE;
448	err = copied;	448	err = copied;
449		449
450	/* Reset and regenerate socket error */	450	/* Reset and regenerate socket error */
451	spin_lock_bh(&sk->sk_error_queue.lock);	451	spin_lock_bh(&sk->sk_error_queue.lock);
452	sk->sk_err = 0;	452	sk->sk_err = 0;
453	skb2 = skb_peek(&sk->sk_error_queue);	453	skb2 = skb_peek(&sk->sk_error_queue);
454	if (skb2 != NULL) {	454	if (skb2 != NULL) {
455	sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;	455	sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
456	spin_unlock_bh(&sk->sk_error_queue.lock);	456	spin_unlock_bh(&sk->sk_error_queue.lock);
457	sk->sk_error_report(sk);	457	sk->sk_error_report(sk);
458	} else	458	} else
459	spin_unlock_bh(&sk->sk_error_queue.lock);	459	spin_unlock_bh(&sk->sk_error_queue.lock);
460		460
461	out_free_skb:	461	out_free_skb:
462	kfree_skb(skb);	462	kfree_skb(skb);
463	out:	463	out:
464	return err;	464	return err;
465	}	465	}
466		466
467		467
468	/*	468	/*
469	* Socket option code for IP. This is the end of the line after any	469	* Socket option code for IP. This is the end of the line after any
470	* TCP,UDP etc options on an IP socket.	470	* TCP,UDP etc options on an IP socket.
471	*/	471	*/
472		472
473	static int do_ip_setsockopt(struct sock *sk, int level,	473	static int do_ip_setsockopt(struct sock *sk, int level,
474	int optname, char __user *optval, unsigned int optlen)	474	int optname, char __user *optval, unsigned int optlen)
475	{	475	{
476	struct inet_sock *inet = inet_sk(sk);	476	struct inet_sock *inet = inet_sk(sk);
477	int val = 0, err;	477	int val = 0, err;
478		478
479	switch (optname) {	479	switch (optname) {
480	case IP_PKTINFO:	480	case IP_PKTINFO:
481	case IP_RECVTTL:	481	case IP_RECVTTL:
482	case IP_RECVOPTS:	482	case IP_RECVOPTS:
483	case IP_RECVTOS:	483	case IP_RECVTOS:
484	case IP_RETOPTS:	484	case IP_RETOPTS:
485	case IP_TOS:	485	case IP_TOS:
486	case IP_TTL:	486	case IP_TTL:
487	case IP_HDRINCL:	487	case IP_HDRINCL:
488	case IP_MTU_DISCOVER:	488	case IP_MTU_DISCOVER:
489	case IP_RECVERR:	489	case IP_RECVERR:
490	case IP_ROUTER_ALERT:	490	case IP_ROUTER_ALERT:
491	case IP_FREEBIND:	491	case IP_FREEBIND:
492	case IP_PASSSEC:	492	case IP_PASSSEC:
493	case IP_TRANSPARENT:	493	case IP_TRANSPARENT:
494	case IP_MINTTL:	494	case IP_MINTTL:
495	case IP_NODEFRAG:	495	case IP_NODEFRAG:
496	case IP_UNICAST_IF:	496	case IP_UNICAST_IF:
497	case IP_MULTICAST_TTL:	497	case IP_MULTICAST_TTL:
498	case IP_MULTICAST_ALL:	498	case IP_MULTICAST_ALL:
499	case IP_MULTICAST_LOOP:	499	case IP_MULTICAST_LOOP:
500	case IP_RECVORIGDSTADDR:	500	case IP_RECVORIGDSTADDR:
501	if (optlen >= sizeof(int)) {	501	if (optlen >= sizeof(int)) {
502	if (get_user(val, (int __user *) optval))	502	if (get_user(val, (int __user *) optval))
503	return -EFAULT;	503	return -EFAULT;
504	} else if (optlen >= sizeof(char)) {	504	} else if (optlen >= sizeof(char)) {
505	unsigned char ucval;	505	unsigned char ucval;
506		506
507	if (get_user(ucval, (unsigned char __user *) optval))	507	if (get_user(ucval, (unsigned char __user *) optval))
508	return -EFAULT;	508	return -EFAULT;
509	val = (int) ucval;	509	val = (int) ucval;
510	}	510	}
511	}	511	}
512		512
513	/* If optlen==0, it is equivalent to val == 0 */	513	/* If optlen==0, it is equivalent to val == 0 */
514		514
515	if (ip_mroute_opt(optname))	515	if (ip_mroute_opt(optname))
516	return ip_mroute_setsockopt(sk, optname, optval, optlen);	516	return ip_mroute_setsockopt(sk, optname, optval, optlen);
517		517
518	err = 0;	518	err = 0;
519	lock_sock(sk);	519	lock_sock(sk);
520		520
521	switch (optname) {	521	switch (optname) {
522	case IP_OPTIONS:	522	case IP_OPTIONS:
523	{	523	{
524	struct ip_options_rcu old, opt = NULL;	524	struct ip_options_rcu old, opt = NULL;
525		525
526	if (optlen > 40)	526	if (optlen > 40)
527	goto e_inval;	527	goto e_inval;
528	err = ip_options_get_from_user(sock_net(sk), &opt,	528	err = ip_options_get_from_user(sock_net(sk), &opt,
529	optval, optlen);	529	optval, optlen);
530	if (err)	530	if (err)
531	break;	531	break;
532	old = rcu_dereference_protected(inet->inet_opt,	532	old = rcu_dereference_protected(inet->inet_opt,
533	sock_owned_by_user(sk));	533	sock_owned_by_user(sk));
534	if (inet->is_icsk) {	534	if (inet->is_icsk) {
535	struct inet_connection_sock *icsk = inet_csk(sk);	535	struct inet_connection_sock *icsk = inet_csk(sk);
536	#if IS_ENABLED(CONFIG_IPV6)	536	#if IS_ENABLED(CONFIG_IPV6)
537	if (sk->sk_family == PF_INET \|\|	537	if (sk->sk_family == PF_INET \|\|
538	(!((1 << sk->sk_state) &	538	(!((1 << sk->sk_state) &
539	(TCPF_LISTEN \| TCPF_CLOSE)) &&	539	(TCPF_LISTEN \| TCPF_CLOSE)) &&
540	inet->inet_daddr != LOOPBACK4_IPV6)) {	540	inet->inet_daddr != LOOPBACK4_IPV6)) {
541	#endif	541	#endif
542	if (old)	542	if (old)
543	icsk->icsk_ext_hdr_len -= old->opt.optlen;	543	icsk->icsk_ext_hdr_len -= old->opt.optlen;
544	if (opt)	544	if (opt)
545	icsk->icsk_ext_hdr_len += opt->opt.optlen;	545	icsk->icsk_ext_hdr_len += opt->opt.optlen;
546	icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);	546	icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
547	#if IS_ENABLED(CONFIG_IPV6)	547	#if IS_ENABLED(CONFIG_IPV6)
548	}	548	}
549	#endif	549	#endif
550	}	550	}
551	rcu_assign_pointer(inet->inet_opt, opt);	551	rcu_assign_pointer(inet->inet_opt, opt);
552	if (old)	552	if (old)
553	kfree_rcu(old, rcu);	553	kfree_rcu(old, rcu);
554	break;	554	break;
555	}	555	}
556	case IP_PKTINFO:	556	case IP_PKTINFO:
557	if (val)	557	if (val)
558	inet->cmsg_flags \|= IP_CMSG_PKTINFO;	558	inet->cmsg_flags \|= IP_CMSG_PKTINFO;
559	else	559	else
560	inet->cmsg_flags &= ~IP_CMSG_PKTINFO;	560	inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
561	break;	561	break;
562	case IP_RECVTTL:	562	case IP_RECVTTL:
563	if (val)	563	if (val)
564	inet->cmsg_flags \|= IP_CMSG_TTL;	564	inet->cmsg_flags \|= IP_CMSG_TTL;
565	else	565	else
566	inet->cmsg_flags &= ~IP_CMSG_TTL;	566	inet->cmsg_flags &= ~IP_CMSG_TTL;
567	break;	567	break;
568	case IP_RECVTOS:	568	case IP_RECVTOS:
569	if (val)	569	if (val)
570	inet->cmsg_flags \|= IP_CMSG_TOS;	570	inet->cmsg_flags \|= IP_CMSG_TOS;
571	else	571	else
572	inet->cmsg_flags &= ~IP_CMSG_TOS;	572	inet->cmsg_flags &= ~IP_CMSG_TOS;
573	break;	573	break;
574	case IP_RECVOPTS:	574	case IP_RECVOPTS:
575	if (val)	575	if (val)
576	inet->cmsg_flags \|= IP_CMSG_RECVOPTS;	576	inet->cmsg_flags \|= IP_CMSG_RECVOPTS;
577	else	577	else
578	inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;	578	inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
579	break;	579	break;
580	case IP_RETOPTS:	580	case IP_RETOPTS:
581	if (val)	581	if (val)
582	inet->cmsg_flags \|= IP_CMSG_RETOPTS;	582	inet->cmsg_flags \|= IP_CMSG_RETOPTS;
583	else	583	else
584	inet->cmsg_flags &= ~IP_CMSG_RETOPTS;	584	inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
585	break;	585	break;
586	case IP_PASSSEC:	586	case IP_PASSSEC:
587	if (val)	587	if (val)
588	inet->cmsg_flags \|= IP_CMSG_PASSSEC;	588	inet->cmsg_flags \|= IP_CMSG_PASSSEC;
589	else	589	else
590	inet->cmsg_flags &= ~IP_CMSG_PASSSEC;	590	inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
591	break;	591	break;
592	case IP_RECVORIGDSTADDR:	592	case IP_RECVORIGDSTADDR:
593	if (val)	593	if (val)
594	inet->cmsg_flags \|= IP_CMSG_ORIGDSTADDR;	594	inet->cmsg_flags \|= IP_CMSG_ORIGDSTADDR;
595	else	595	else
596	inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;	596	inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
597	break;	597	break;
598	case IP_TOS: /* This sets both TOS and Precedence */	598	case IP_TOS: /* This sets both TOS and Precedence */
599	if (sk->sk_type == SOCK_STREAM) {	599	if (sk->sk_type == SOCK_STREAM) {
600	val &= ~INET_ECN_MASK;	600	val &= ~INET_ECN_MASK;
601	val \|= inet->tos & INET_ECN_MASK;	601	val \|= inet->tos & INET_ECN_MASK;
602	}	602	}
603	if (inet->tos != val) {	603	if (inet->tos != val) {
604	inet->tos = val;	604	inet->tos = val;
605	sk->sk_priority = rt_tos2priority(val);	605	sk->sk_priority = rt_tos2priority(val);
606	sk_dst_reset(sk);	606	sk_dst_reset(sk);
607	}	607	}
608	break;	608	break;
609	case IP_TTL:	609	case IP_TTL:
610	if (optlen < 1)	610	if (optlen < 1)
611	goto e_inval;	611	goto e_inval;
612	if (val != -1 && (val < 1 \|\| val > 255))	612	if (val != -1 && (val < 1 \|\| val > 255))
613	goto e_inval;	613	goto e_inval;
614	inet->uc_ttl = val;	614	inet->uc_ttl = val;
615	break;	615	break;
616	case IP_HDRINCL:	616	case IP_HDRINCL:
617	if (sk->sk_type != SOCK_RAW) {	617	if (sk->sk_type != SOCK_RAW) {
618	err = -ENOPROTOOPT;	618	err = -ENOPROTOOPT;
619	break;	619	break;
620	}	620	}
621	inet->hdrincl = val ? 1 : 0;	621	inet->hdrincl = val ? 1 : 0;
622	break;	622	break;
623	case IP_NODEFRAG:	623	case IP_NODEFRAG:
624	if (sk->sk_type != SOCK_RAW) {	624	if (sk->sk_type != SOCK_RAW) {
625	err = -ENOPROTOOPT;	625	err = -ENOPROTOOPT;
626	break;	626	break;
627	}	627	}
628	inet->nodefrag = val ? 1 : 0;	628	inet->nodefrag = val ? 1 : 0;
629	break;	629	break;
630	case IP_MTU_DISCOVER:	630	case IP_MTU_DISCOVER:
631	if (val < IP_PMTUDISC_DONT \|\| val > IP_PMTUDISC_INTERFACE)	631	if (val < IP_PMTUDISC_DONT \|\| val > IP_PMTUDISC_INTERFACE)
632	goto e_inval;	632	goto e_inval;
633	inet->pmtudisc = val;	633	inet->pmtudisc = val;
634	break;	634	break;
635	case IP_RECVERR:	635	case IP_RECVERR:
636	inet->recverr = !!val;	636	inet->recverr = !!val;
637	if (!val)	637	if (!val)
638	skb_queue_purge(&sk->sk_error_queue);	638	skb_queue_purge(&sk->sk_error_queue);
639	break;	639	break;
640	case IP_MULTICAST_TTL:	640	case IP_MULTICAST_TTL:
641	if (sk->sk_type == SOCK_STREAM)	641	if (sk->sk_type == SOCK_STREAM)
642	goto e_inval;	642	goto e_inval;
643	if (optlen < 1)	643	if (optlen < 1)
644	goto e_inval;	644	goto e_inval;
645	if (val == -1)	645	if (val == -1)
646	val = 1;	646	val = 1;
647	if (val < 0 \|\| val > 255)	647	if (val < 0 \|\| val > 255)
648	goto e_inval;	648	goto e_inval;
649	inet->mc_ttl = val;	649	inet->mc_ttl = val;
650	break;	650	break;
651	case IP_MULTICAST_LOOP:	651	case IP_MULTICAST_LOOP:
652	if (optlen < 1)	652	if (optlen < 1)
653	goto e_inval;	653	goto e_inval;
654	inet->mc_loop = !!val;	654	inet->mc_loop = !!val;
655	break;	655	break;
656	case IP_UNICAST_IF:	656	case IP_UNICAST_IF:
657	{	657	{
658	struct net_device *dev = NULL;	658	struct net_device *dev = NULL;
659	int ifindex;	659	int ifindex;
660		660
661	if (optlen != sizeof(int))	661	if (optlen != sizeof(int))
662	goto e_inval;	662	goto e_inval;
663		663
664	ifindex = (__force int)ntohl((__force __be32)val);	664	ifindex = (__force int)ntohl((__force __be32)val);
665	if (ifindex == 0) {	665	if (ifindex == 0) {
666	inet->uc_index = 0;	666	inet->uc_index = 0;
667	err = 0;	667	err = 0;
668	break;	668	break;
669	}	669	}
670		670
671	dev = dev_get_by_index(sock_net(sk), ifindex);	671	dev = dev_get_by_index(sock_net(sk), ifindex);
672	err = -EADDRNOTAVAIL;	672	err = -EADDRNOTAVAIL;
673	if (!dev)	673	if (!dev)
674	break;	674	break;
675	dev_put(dev);	675	dev_put(dev);
676		676
677	err = -EINVAL;	677	err = -EINVAL;
678	if (sk->sk_bound_dev_if)	678	if (sk->sk_bound_dev_if)
679	break;	679	break;
680		680
681	inet->uc_index = ifindex;	681	inet->uc_index = ifindex;
682	err = 0;	682	err = 0;
683	break;	683	break;
684	}	684	}
685	case IP_MULTICAST_IF:	685	case IP_MULTICAST_IF:
686	{	686	{
687	struct ip_mreqn mreq;	687	struct ip_mreqn mreq;
688	struct net_device *dev = NULL;	688	struct net_device *dev = NULL;
689		689
690	if (sk->sk_type == SOCK_STREAM)	690	if (sk->sk_type == SOCK_STREAM)
691	goto e_inval;	691	goto e_inval;
692	/*	692	/*
693	* Check the arguments are allowable	693	* Check the arguments are allowable
694	*/	694	*/
695		695
696	if (optlen < sizeof(struct in_addr))	696	if (optlen < sizeof(struct in_addr))
697	goto e_inval;	697	goto e_inval;
698		698
699	err = -EFAULT;	699	err = -EFAULT;
700	if (optlen >= sizeof(struct ip_mreqn)) {	700	if (optlen >= sizeof(struct ip_mreqn)) {
701	if (copy_from_user(&mreq, optval, sizeof(mreq)))	701	if (copy_from_user(&mreq, optval, sizeof(mreq)))
702	break;	702	break;
703	} else {	703	} else {
704	memset(&mreq, 0, sizeof(mreq));	704	memset(&mreq, 0, sizeof(mreq));
705	if (optlen >= sizeof(struct ip_mreq)) {	705	if (optlen >= sizeof(struct ip_mreq)) {
706	if (copy_from_user(&mreq, optval,	706	if (copy_from_user(&mreq, optval,
707	sizeof(struct ip_mreq)))	707	sizeof(struct ip_mreq)))
708	break;	708	break;
709	} else if (optlen >= sizeof(struct in_addr)) {	709	} else if (optlen >= sizeof(struct in_addr)) {
710	if (copy_from_user(&mreq.imr_address, optval,	710	if (copy_from_user(&mreq.imr_address, optval,
711	sizeof(struct in_addr)))	711	sizeof(struct in_addr)))
712	break;	712	break;
713	}	713	}
714	}	714	}
715		715
716	if (!mreq.imr_ifindex) {	716	if (!mreq.imr_ifindex) {
717	if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {	717	if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
718	inet->mc_index = 0;	718	inet->mc_index = 0;
719	inet->mc_addr = 0;	719	inet->mc_addr = 0;
720	err = 0;	720	err = 0;
721	break;	721	break;
722	}	722	}
723	dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);	723	dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
724	if (dev)	724	if (dev)
725	mreq.imr_ifindex = dev->ifindex;	725	mreq.imr_ifindex = dev->ifindex;
726	} else	726	} else
727	dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);	727	dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
728		728
729		729
730	err = -EADDRNOTAVAIL;	730	err = -EADDRNOTAVAIL;
731	if (!dev)	731	if (!dev)
732	break;	732	break;
733	dev_put(dev);	733	dev_put(dev);
734		734
735	err = -EINVAL;	735	err = -EINVAL;
736	if (sk->sk_bound_dev_if &&	736	if (sk->sk_bound_dev_if &&
737	mreq.imr_ifindex != sk->sk_bound_dev_if)	737	mreq.imr_ifindex != sk->sk_bound_dev_if)
738	break;	738	break;
739		739
740	inet->mc_index = mreq.imr_ifindex;	740	inet->mc_index = mreq.imr_ifindex;
741	inet->mc_addr = mreq.imr_address.s_addr;	741	inet->mc_addr = mreq.imr_address.s_addr;
742	err = 0;	742	err = 0;
743	break;	743	break;
744	}	744	}
745		745
746	case IP_ADD_MEMBERSHIP:	746	case IP_ADD_MEMBERSHIP:
747	case IP_DROP_MEMBERSHIP:	747	case IP_DROP_MEMBERSHIP:
748	{	748	{
749	struct ip_mreqn mreq;	749	struct ip_mreqn mreq;
750		750
751	err = -EPROTO;	751	err = -EPROTO;
752	if (inet_sk(sk)->is_icsk)	752	if (inet_sk(sk)->is_icsk)
753	break;	753	break;
754		754
755	if (optlen < sizeof(struct ip_mreq))	755	if (optlen < sizeof(struct ip_mreq))
756	goto e_inval;	756	goto e_inval;
757	err = -EFAULT;	757	err = -EFAULT;
758	if (optlen >= sizeof(struct ip_mreqn)) {	758	if (optlen >= sizeof(struct ip_mreqn)) {
759	if (copy_from_user(&mreq, optval, sizeof(mreq)))	759	if (copy_from_user(&mreq, optval, sizeof(mreq)))
760	break;	760	break;
761	} else {	761	} else {
762	memset(&mreq, 0, sizeof(mreq));	762	memset(&mreq, 0, sizeof(mreq));
763	if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))	763	if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
764	break;	764	break;
765	}	765	}
766		766
767	if (optname == IP_ADD_MEMBERSHIP)	767	if (optname == IP_ADD_MEMBERSHIP)
768	err = ip_mc_join_group(sk, &mreq);	768	err = ip_mc_join_group(sk, &mreq);
769	else	769	else
770	err = ip_mc_leave_group(sk, &mreq);	770	err = ip_mc_leave_group(sk, &mreq);
771	break;	771	break;
772	}	772	}
773	case IP_MSFILTER:	773	case IP_MSFILTER:
774	{	774	{
775	struct ip_msfilter *msf;	775	struct ip_msfilter *msf;
776		776
777	if (optlen < IP_MSFILTER_SIZE(0))	777	if (optlen < IP_MSFILTER_SIZE(0))
778	goto e_inval;	778	goto e_inval;
779	if (optlen > sysctl_optmem_max) {	779	if (optlen > sysctl_optmem_max) {
780	err = -ENOBUFS;	780	err = -ENOBUFS;
781	break;	781	break;
782	}	782	}
783	msf = kmalloc(optlen, GFP_KERNEL);	783	msf = kmalloc(optlen, GFP_KERNEL);
784	if (!msf) {	784	if (!msf) {
785	err = -ENOBUFS;	785	err = -ENOBUFS;
786	break;	786	break;
787	}	787	}
788	err = -EFAULT;	788	err = -EFAULT;
789	if (copy_from_user(msf, optval, optlen)) {	789	if (copy_from_user(msf, optval, optlen)) {
790	kfree(msf);	790	kfree(msf);
791	break;	791	break;
792	}	792	}
793	/* numsrc >= (1G-4) overflow in 32 bits */	793	/* numsrc >= (1G-4) overflow in 32 bits */
794	if (msf->imsf_numsrc >= 0x3ffffffcU \|\|	794	if (msf->imsf_numsrc >= 0x3ffffffcU \|\|
795	msf->imsf_numsrc > sysctl_igmp_max_msf) {	795	msf->imsf_numsrc > sysctl_igmp_max_msf) {
796	kfree(msf);	796	kfree(msf);
797	err = -ENOBUFS;	797	err = -ENOBUFS;
798	break;	798	break;
799	}	799	}
800	if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {	800	if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
801	kfree(msf);	801	kfree(msf);
802	err = -EINVAL;	802	err = -EINVAL;
803	break;	803	break;
804	}	804	}
805	err = ip_mc_msfilter(sk, msf, 0);	805	err = ip_mc_msfilter(sk, msf, 0);
806	kfree(msf);	806	kfree(msf);
807	break;	807	break;
808	}	808	}
809	case IP_BLOCK_SOURCE:	809	case IP_BLOCK_SOURCE:
810	case IP_UNBLOCK_SOURCE:	810	case IP_UNBLOCK_SOURCE:
811	case IP_ADD_SOURCE_MEMBERSHIP:	811	case IP_ADD_SOURCE_MEMBERSHIP:
812	case IP_DROP_SOURCE_MEMBERSHIP:	812	case IP_DROP_SOURCE_MEMBERSHIP:
813	{	813	{
814	struct ip_mreq_source mreqs;	814	struct ip_mreq_source mreqs;
815	int omode, add;	815	int omode, add;
816		816
817	if (optlen != sizeof(struct ip_mreq_source))	817	if (optlen != sizeof(struct ip_mreq_source))
818	goto e_inval;	818	goto e_inval;
819	if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {	819	if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
820	err = -EFAULT;	820	err = -EFAULT;
821	break;	821	break;
822	}	822	}
823	if (optname == IP_BLOCK_SOURCE) {	823	if (optname == IP_BLOCK_SOURCE) {
824	omode = MCAST_EXCLUDE;	824	omode = MCAST_EXCLUDE;
825	add = 1;	825	add = 1;
826	} else if (optname == IP_UNBLOCK_SOURCE) {	826	} else if (optname == IP_UNBLOCK_SOURCE) {
827	omode = MCAST_EXCLUDE;	827	omode = MCAST_EXCLUDE;
828	add = 0;	828	add = 0;
829	} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {	829	} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
830	struct ip_mreqn mreq;	830	struct ip_mreqn mreq;
831		831
832	mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;	832	mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
833	mreq.imr_address.s_addr = mreqs.imr_interface;	833	mreq.imr_address.s_addr = mreqs.imr_interface;
834	mreq.imr_ifindex = 0;	834	mreq.imr_ifindex = 0;
835	err = ip_mc_join_group(sk, &mreq);	835	err = ip_mc_join_group(sk, &mreq);
836	if (err && err != -EADDRINUSE)	836	if (err && err != -EADDRINUSE)
837	break;	837	break;
838	omode = MCAST_INCLUDE;	838	omode = MCAST_INCLUDE;
839	add = 1;	839	add = 1;
840	} else /* IP_DROP_SOURCE_MEMBERSHIP */ {	840	} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
841	omode = MCAST_INCLUDE;	841	omode = MCAST_INCLUDE;
842	add = 0;	842	add = 0;
843	}	843	}
844	err = ip_mc_source(add, omode, sk, &mreqs, 0);	844	err = ip_mc_source(add, omode, sk, &mreqs, 0);
845	break;	845	break;
846	}	846	}
847	case MCAST_JOIN_GROUP:	847	case MCAST_JOIN_GROUP:
848	case MCAST_LEAVE_GROUP:	848	case MCAST_LEAVE_GROUP:
849	{	849	{
850	struct group_req greq;	850	struct group_req greq;
851	struct sockaddr_in *psin;	851	struct sockaddr_in *psin;
852	struct ip_mreqn mreq;	852	struct ip_mreqn mreq;
853		853
854	if (optlen < sizeof(struct group_req))	854	if (optlen < sizeof(struct group_req))
855	goto e_inval;	855	goto e_inval;
856	err = -EFAULT;	856	err = -EFAULT;
857	if (copy_from_user(&greq, optval, sizeof(greq)))	857	if (copy_from_user(&greq, optval, sizeof(greq)))
858	break;	858	break;
859	psin = (struct sockaddr_in *)&greq.gr_group;	859	psin = (struct sockaddr_in *)&greq.gr_group;
860	if (psin->sin_family != AF_INET)	860	if (psin->sin_family != AF_INET)
861	goto e_inval;	861	goto e_inval;
862	memset(&mreq, 0, sizeof(mreq));	862	memset(&mreq, 0, sizeof(mreq));
863	mreq.imr_multiaddr = psin->sin_addr;	863	mreq.imr_multiaddr = psin->sin_addr;
864	mreq.imr_ifindex = greq.gr_interface;	864	mreq.imr_ifindex = greq.gr_interface;
865		865
866	if (optname == MCAST_JOIN_GROUP)	866	if (optname == MCAST_JOIN_GROUP)
867	err = ip_mc_join_group(sk, &mreq);	867	err = ip_mc_join_group(sk, &mreq);
868	else	868	else
869	err = ip_mc_leave_group(sk, &mreq);	869	err = ip_mc_leave_group(sk, &mreq);
870	break;	870	break;
871	}	871	}
872	case MCAST_JOIN_SOURCE_GROUP:	872	case MCAST_JOIN_SOURCE_GROUP:
873	case MCAST_LEAVE_SOURCE_GROUP:	873	case MCAST_LEAVE_SOURCE_GROUP:
874	case MCAST_BLOCK_SOURCE:	874	case MCAST_BLOCK_SOURCE:
875	case MCAST_UNBLOCK_SOURCE:	875	case MCAST_UNBLOCK_SOURCE:
876	{	876	{
877	struct group_source_req greqs;	877	struct group_source_req greqs;
878	struct ip_mreq_source mreqs;	878	struct ip_mreq_source mreqs;
879	struct sockaddr_in *psin;	879	struct sockaddr_in *psin;
880	int omode, add;	880	int omode, add;
881		881
882	if (optlen != sizeof(struct group_source_req))	882	if (optlen != sizeof(struct group_source_req))
883	goto e_inval;	883	goto e_inval;
884	if (copy_from_user(&greqs, optval, sizeof(greqs))) {	884	if (copy_from_user(&greqs, optval, sizeof(greqs))) {
885	err = -EFAULT;	885	err = -EFAULT;
886	break;	886	break;
887	}	887	}
888	if (greqs.gsr_group.ss_family != AF_INET \|\|	888	if (greqs.gsr_group.ss_family != AF_INET \|\|
889	greqs.gsr_source.ss_family != AF_INET) {	889	greqs.gsr_source.ss_family != AF_INET) {
890	err = -EADDRNOTAVAIL;	890	err = -EADDRNOTAVAIL;
891	break;	891	break;
892	}	892	}
893	psin = (struct sockaddr_in *)&greqs.gsr_group;	893	psin = (struct sockaddr_in *)&greqs.gsr_group;
894	mreqs.imr_multiaddr = psin->sin_addr.s_addr;	894	mreqs.imr_multiaddr = psin->sin_addr.s_addr;
895	psin = (struct sockaddr_in *)&greqs.gsr_source;	895	psin = (struct sockaddr_in *)&greqs.gsr_source;
896	mreqs.imr_sourceaddr = psin->sin_addr.s_addr;	896	mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
897	mreqs.imr_interface = 0; /* use index for mc_source */	897	mreqs.imr_interface = 0; /* use index for mc_source */
898		898
899	if (optname == MCAST_BLOCK_SOURCE) {	899	if (optname == MCAST_BLOCK_SOURCE) {
900	omode = MCAST_EXCLUDE;	900	omode = MCAST_EXCLUDE;
901	add = 1;	901	add = 1;
902	} else if (optname == MCAST_UNBLOCK_SOURCE) {	902	} else if (optname == MCAST_UNBLOCK_SOURCE) {
903	omode = MCAST_EXCLUDE;	903	omode = MCAST_EXCLUDE;
904	add = 0;	904	add = 0;
905	} else if (optname == MCAST_JOIN_SOURCE_GROUP) {	905	} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
906	struct ip_mreqn mreq;	906	struct ip_mreqn mreq;
907		907
908	psin = (struct sockaddr_in *)&greqs.gsr_group;	908	psin = (struct sockaddr_in *)&greqs.gsr_group;
909	mreq.imr_multiaddr = psin->sin_addr;	909	mreq.imr_multiaddr = psin->sin_addr;
910	mreq.imr_address.s_addr = 0;	910	mreq.imr_address.s_addr = 0;
911	mreq.imr_ifindex = greqs.gsr_interface;	911	mreq.imr_ifindex = greqs.gsr_interface;
912	err = ip_mc_join_group(sk, &mreq);	912	err = ip_mc_join_group(sk, &mreq);
913	if (err && err != -EADDRINUSE)	913	if (err && err != -EADDRINUSE)
914	break;	914	break;
915	greqs.gsr_interface = mreq.imr_ifindex;	915	greqs.gsr_interface = mreq.imr_ifindex;
916	omode = MCAST_INCLUDE;	916	omode = MCAST_INCLUDE;
917	add = 1;	917	add = 1;
918	} else /* MCAST_LEAVE_SOURCE_GROUP */ {	918	} else /* MCAST_LEAVE_SOURCE_GROUP */ {
919	omode = MCAST_INCLUDE;	919	omode = MCAST_INCLUDE;
920	add = 0;	920	add = 0;
921	}	921	}
922	err = ip_mc_source(add, omode, sk, &mreqs,	922	err = ip_mc_source(add, omode, sk, &mreqs,
923	greqs.gsr_interface);	923	greqs.gsr_interface);
924	break;	924	break;
925	}	925	}
926	case MCAST_MSFILTER:	926	case MCAST_MSFILTER:
927	{	927	{
928	struct sockaddr_in *psin;	928	struct sockaddr_in *psin;
929	struct ip_msfilter *msf = NULL;	929	struct ip_msfilter *msf = NULL;
930	struct group_filter *gsf = NULL;	930	struct group_filter *gsf = NULL;
931	int msize, i, ifindex;	931	int msize, i, ifindex;
932		932
933	if (optlen < GROUP_FILTER_SIZE(0))	933	if (optlen < GROUP_FILTER_SIZE(0))
934	goto e_inval;	934	goto e_inval;
935	if (optlen > sysctl_optmem_max) {	935	if (optlen > sysctl_optmem_max) {
936	err = -ENOBUFS;	936	err = -ENOBUFS;
937	break;	937	break;
938	}	938	}
939	gsf = kmalloc(optlen, GFP_KERNEL);	939	gsf = kmalloc(optlen, GFP_KERNEL);
940	if (!gsf) {	940	if (!gsf) {
941	err = -ENOBUFS;	941	err = -ENOBUFS;
942	break;	942	break;
943	}	943	}
944	err = -EFAULT;	944	err = -EFAULT;
945	if (copy_from_user(gsf, optval, optlen))	945	if (copy_from_user(gsf, optval, optlen))
946	goto mc_msf_out;	946	goto mc_msf_out;
947		947
948	/* numsrc >= (4G-140)/128 overflow in 32 bits */	948	/* numsrc >= (4G-140)/128 overflow in 32 bits */
949	if (gsf->gf_numsrc >= 0x1ffffff \|\|	949	if (gsf->gf_numsrc >= 0x1ffffff \|\|
950	gsf->gf_numsrc > sysctl_igmp_max_msf) {	950	gsf->gf_numsrc > sysctl_igmp_max_msf) {
951	err = -ENOBUFS;	951	err = -ENOBUFS;
952	goto mc_msf_out;	952	goto mc_msf_out;
953	}	953	}
954	if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {	954	if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
955	err = -EINVAL;	955	err = -EINVAL;
956	goto mc_msf_out;	956	goto mc_msf_out;
957	}	957	}
958	msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);	958	msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
959	msf = kmalloc(msize, GFP_KERNEL);	959	msf = kmalloc(msize, GFP_KERNEL);
960	if (!msf) {	960	if (!msf) {
961	err = -ENOBUFS;	961	err = -ENOBUFS;
962	goto mc_msf_out;	962	goto mc_msf_out;
963	}	963	}
964	ifindex = gsf->gf_interface;	964	ifindex = gsf->gf_interface;
965	psin = (struct sockaddr_in *)&gsf->gf_group;	965	psin = (struct sockaddr_in *)&gsf->gf_group;
966	if (psin->sin_family != AF_INET) {	966	if (psin->sin_family != AF_INET) {
967	err = -EADDRNOTAVAIL;	967	err = -EADDRNOTAVAIL;
968	goto mc_msf_out;	968	goto mc_msf_out;
969	}	969	}
970	msf->imsf_multiaddr = psin->sin_addr.s_addr;	970	msf->imsf_multiaddr = psin->sin_addr.s_addr;
971	msf->imsf_interface = 0;	971	msf->imsf_interface = 0;
972	msf->imsf_fmode = gsf->gf_fmode;	972	msf->imsf_fmode = gsf->gf_fmode;
973	msf->imsf_numsrc = gsf->gf_numsrc;	973	msf->imsf_numsrc = gsf->gf_numsrc;
974	err = -EADDRNOTAVAIL;	974	err = -EADDRNOTAVAIL;
975	for (i = 0; i < gsf->gf_numsrc; ++i) {	975	for (i = 0; i < gsf->gf_numsrc; ++i) {
976	psin = (struct sockaddr_in *)&gsf->gf_slist[i];	976	psin = (struct sockaddr_in *)&gsf->gf_slist[i];
977		977
978	if (psin->sin_family != AF_INET)	978	if (psin->sin_family != AF_INET)
979	goto mc_msf_out;	979	goto mc_msf_out;
980	msf->imsf_slist[i] = psin->sin_addr.s_addr;	980	msf->imsf_slist[i] = psin->sin_addr.s_addr;
981	}	981	}
982	kfree(gsf);	982	kfree(gsf);
983	gsf = NULL;	983	gsf = NULL;
984		984
985	err = ip_mc_msfilter(sk, msf, ifindex);	985	err = ip_mc_msfilter(sk, msf, ifindex);
986	mc_msf_out:	986	mc_msf_out:
987	kfree(msf);	987	kfree(msf);
988	kfree(gsf);	988	kfree(gsf);
989	break;	989	break;
990	}	990	}
991	case IP_MULTICAST_ALL:	991	case IP_MULTICAST_ALL:
992	if (optlen < 1)	992	if (optlen < 1)
993	goto e_inval;	993	goto e_inval;
994	if (val != 0 && val != 1)	994	if (val != 0 && val != 1)
995	goto e_inval;	995	goto e_inval;
996	inet->mc_all = val;	996	inet->mc_all = val;
997	break;	997	break;
998	case IP_ROUTER_ALERT:	998	case IP_ROUTER_ALERT:
999	err = ip_ra_control(sk, val ? 1 : 0, NULL);	999	err = ip_ra_control(sk, val ? 1 : 0, NULL);
1000	break;	1000	break;
1001		1001
1002	case IP_FREEBIND:	1002	case IP_FREEBIND:
1003	if (optlen < 1)	1003	if (optlen < 1)
1004	goto e_inval;	1004	goto e_inval;
1005	inet->freebind = !!val;	1005	inet->freebind = !!val;
1006	break;	1006	break;
1007		1007
1008	case IP_IPSEC_POLICY:	1008	case IP_IPSEC_POLICY:
1009	case IP_XFRM_POLICY:	1009	case IP_XFRM_POLICY:
1010	err = -EPERM;	1010	err = -EPERM;
1011	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))	1011	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1012	break;	1012	break;
1013	err = xfrm_user_policy(sk, optname, optval, optlen);	1013	err = xfrm_user_policy(sk, optname, optval, optlen);
1014	break;	1014	break;
1015		1015
1016	case IP_TRANSPARENT:	1016	case IP_TRANSPARENT:
1017	if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&	1017	if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1018	!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {	1018	!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1019	err = -EPERM;	1019	err = -EPERM;
1020	break;	1020	break;
1021	}	1021	}
1022	if (optlen < 1)	1022	if (optlen < 1)
1023	goto e_inval;	1023	goto e_inval;
1024	inet->transparent = !!val;	1024	inet->transparent = !!val;
1025	break;	1025	break;
1026		1026
1027	case IP_MINTTL:	1027	case IP_MINTTL:
1028	if (optlen < 1)	1028	if (optlen < 1)
1029	goto e_inval;	1029	goto e_inval;
1030	if (val < 0 \|\| val > 255)	1030	if (val < 0 \|\| val > 255)
1031	goto e_inval;	1031	goto e_inval;
1032	inet->min_ttl = val;	1032	inet->min_ttl = val;
1033	break;	1033	break;
1034		1034
1035	default:	1035	default:
1036	err = -ENOPROTOOPT;	1036	err = -ENOPROTOOPT;
1037	break;	1037	break;
1038	}	1038	}
1039	release_sock(sk);	1039	release_sock(sk);
1040	return err;	1040	return err;
1041		1041
1042	e_inval:	1042	e_inval:
1043	release_sock(sk);	1043	release_sock(sk);
1044	return -EINVAL;	1044	return -EINVAL;
1045	}	1045	}
1046		1046
1047	/**	1047	/**
1048	* ipv4_pktinfo_prepare - transfert some info from rtable to skb	1048	* ipv4_pktinfo_prepare - transfert some info from rtable to skb
1049	* @sk: socket	1049	* @sk: socket
1050	* @skb: buffer	1050	* @skb: buffer
1051	*	1051	*
1052	* To support IP_CMSG_PKTINFO option, we store rt_iif and specific	1052	* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1053	* destination in skb->cb[] before dst drop.	1053	* destination in skb->cb[] before dst drop.
1054	* This way, receiver doesnt make cache line misses to read rtable.	1054	* This way, receiver doesn't make cache line misses to read rtable.
1055	*/	1055	*/
1056	void ipv4_pktinfo_prepare(const struct sock sk, struct sk_buff skb)	1056	void ipv4_pktinfo_prepare(const struct sock sk, struct sk_buff skb)
1057	{	1057	{
1058	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);	1058	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1059		1059
1060	if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) &&	1060	if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) &&
1061	skb_rtable(skb)) {	1061	skb_rtable(skb)) {
1062	pktinfo->ipi_ifindex = inet_iif(skb);	1062	pktinfo->ipi_ifindex = inet_iif(skb);
1063	pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);	1063	pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1064	} else {	1064	} else {
1065	pktinfo->ipi_ifindex = 0;	1065	pktinfo->ipi_ifindex = 0;
1066	pktinfo->ipi_spec_dst.s_addr = 0;	1066	pktinfo->ipi_spec_dst.s_addr = 0;
1067	}	1067	}
1068	skb_dst_drop(skb);	1068	skb_dst_drop(skb);
1069	}	1069	}
1070		1070
1071	int ip_setsockopt(struct sock *sk, int level,	1071	int ip_setsockopt(struct sock *sk, int level,
1072	int optname, char __user *optval, unsigned int optlen)	1072	int optname, char __user *optval, unsigned int optlen)
1073	{	1073	{
1074	int err;	1074	int err;
1075		1075
1076	if (level != SOL_IP)	1076	if (level != SOL_IP)
1077	return -ENOPROTOOPT;	1077	return -ENOPROTOOPT;
1078		1078
1079	err = do_ip_setsockopt(sk, level, optname, optval, optlen);	1079	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1080	#ifdef CONFIG_NETFILTER	1080	#ifdef CONFIG_NETFILTER
1081	/* we need to exclude all possible ENOPROTOOPTs except default case */	1081	/* we need to exclude all possible ENOPROTOOPTs except default case */
1082	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&	1082	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1083	optname != IP_IPSEC_POLICY &&	1083	optname != IP_IPSEC_POLICY &&
1084	optname != IP_XFRM_POLICY &&	1084	optname != IP_XFRM_POLICY &&
1085	!ip_mroute_opt(optname)) {	1085	!ip_mroute_opt(optname)) {
1086	lock_sock(sk);	1086	lock_sock(sk);
1087	err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);	1087	err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1088	release_sock(sk);	1088	release_sock(sk);
1089	}	1089	}
1090	#endif	1090	#endif
1091	return err;	1091	return err;
1092	}	1092	}
1093	EXPORT_SYMBOL(ip_setsockopt);	1093	EXPORT_SYMBOL(ip_setsockopt);
1094		1094
1095	#ifdef CONFIG_COMPAT	1095	#ifdef CONFIG_COMPAT
1096	int compat_ip_setsockopt(struct sock *sk, int level, int optname,	1096	int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1097	char __user *optval, unsigned int optlen)	1097	char __user *optval, unsigned int optlen)
1098	{	1098	{
1099	int err;	1099	int err;
1100		1100
1101	if (level != SOL_IP)	1101	if (level != SOL_IP)
1102	return -ENOPROTOOPT;	1102	return -ENOPROTOOPT;
1103		1103
1104	if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)	1104	if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
1105	return compat_mc_setsockopt(sk, level, optname, optval, optlen,	1105	return compat_mc_setsockopt(sk, level, optname, optval, optlen,
1106	ip_setsockopt);	1106	ip_setsockopt);
1107		1107
1108	err = do_ip_setsockopt(sk, level, optname, optval, optlen);	1108	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1109	#ifdef CONFIG_NETFILTER	1109	#ifdef CONFIG_NETFILTER
1110	/* we need to exclude all possible ENOPROTOOPTs except default case */	1110	/* we need to exclude all possible ENOPROTOOPTs except default case */
1111	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&	1111	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1112	optname != IP_IPSEC_POLICY &&	1112	optname != IP_IPSEC_POLICY &&
1113	optname != IP_XFRM_POLICY &&	1113	optname != IP_XFRM_POLICY &&
1114	!ip_mroute_opt(optname)) {	1114	!ip_mroute_opt(optname)) {
1115	lock_sock(sk);	1115	lock_sock(sk);
1116	err = compat_nf_setsockopt(sk, PF_INET, optname,	1116	err = compat_nf_setsockopt(sk, PF_INET, optname,
1117	optval, optlen);	1117	optval, optlen);
1118	release_sock(sk);	1118	release_sock(sk);
1119	}	1119	}
1120	#endif	1120	#endif
1121	return err;	1121	return err;
1122	}	1122	}
1123	EXPORT_SYMBOL(compat_ip_setsockopt);	1123	EXPORT_SYMBOL(compat_ip_setsockopt);
1124	#endif	1124	#endif
1125		1125
1126	/*	1126	/*
1127	* Get the options. Note for future reference. The GET of IP options gets	1127	* Get the options. Note for future reference. The GET of IP options gets
1128	* the _received_ ones. The set sets the _sent_ ones.	1128	* the _received_ ones. The set sets the _sent_ ones.
1129	*/	1129	*/
1130		1130
1131	static int do_ip_getsockopt(struct sock *sk, int level, int optname,	1131	static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1132	char __user optval, int __user optlen, unsigned int flags)	1132	char __user optval, int __user optlen, unsigned int flags)
1133	{	1133	{
1134	struct inet_sock *inet = inet_sk(sk);	1134	struct inet_sock *inet = inet_sk(sk);
1135	int val;	1135	int val;
1136	int len;	1136	int len;
1137		1137
1138	if (level != SOL_IP)	1138	if (level != SOL_IP)
1139	return -EOPNOTSUPP;	1139	return -EOPNOTSUPP;
1140		1140
1141	if (ip_mroute_opt(optname))	1141	if (ip_mroute_opt(optname))
1142	return ip_mroute_getsockopt(sk, optname, optval, optlen);	1142	return ip_mroute_getsockopt(sk, optname, optval, optlen);
1143		1143
1144	if (get_user(len, optlen))	1144	if (get_user(len, optlen))
1145	return -EFAULT;	1145	return -EFAULT;
1146	if (len < 0)	1146	if (len < 0)
1147	return -EINVAL;	1147	return -EINVAL;
1148		1148
1149	lock_sock(sk);	1149	lock_sock(sk);
1150		1150
1151	switch (optname) {	1151	switch (optname) {
1152	case IP_OPTIONS:	1152	case IP_OPTIONS:
1153	{	1153	{
1154	unsigned char optbuf[sizeof(struct ip_options)+40];	1154	unsigned char optbuf[sizeof(struct ip_options)+40];
1155	struct ip_options opt = (struct ip_options )optbuf;	1155	struct ip_options opt = (struct ip_options )optbuf;
1156	struct ip_options_rcu *inet_opt;	1156	struct ip_options_rcu *inet_opt;
1157		1157
1158	inet_opt = rcu_dereference_protected(inet->inet_opt,	1158	inet_opt = rcu_dereference_protected(inet->inet_opt,
1159	sock_owned_by_user(sk));	1159	sock_owned_by_user(sk));
1160	opt->optlen = 0;	1160	opt->optlen = 0;
1161	if (inet_opt)	1161	if (inet_opt)
1162	memcpy(optbuf, &inet_opt->opt,	1162	memcpy(optbuf, &inet_opt->opt,
1163	sizeof(struct ip_options) +	1163	sizeof(struct ip_options) +
1164	inet_opt->opt.optlen);	1164	inet_opt->opt.optlen);
1165	release_sock(sk);	1165	release_sock(sk);
1166		1166
1167	if (opt->optlen == 0)	1167	if (opt->optlen == 0)
1168	return put_user(0, optlen);	1168	return put_user(0, optlen);
1169		1169
1170	ip_options_undo(opt);	1170	ip_options_undo(opt);
1171		1171
1172	len = min_t(unsigned int, len, opt->optlen);	1172	len = min_t(unsigned int, len, opt->optlen);
1173	if (put_user(len, optlen))	1173	if (put_user(len, optlen))
1174	return -EFAULT;	1174	return -EFAULT;
1175	if (copy_to_user(optval, opt->__data, len))	1175	if (copy_to_user(optval, opt->__data, len))
1176	return -EFAULT;	1176	return -EFAULT;
1177	return 0;	1177	return 0;
1178	}	1178	}
1179	case IP_PKTINFO:	1179	case IP_PKTINFO:
1180	val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;	1180	val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1181	break;	1181	break;
1182	case IP_RECVTTL:	1182	case IP_RECVTTL:
1183	val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;	1183	val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1184	break;	1184	break;
1185	case IP_RECVTOS:	1185	case IP_RECVTOS:
1186	val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;	1186	val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1187	break;	1187	break;
1188	case IP_RECVOPTS:	1188	case IP_RECVOPTS:
1189	val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;	1189	val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1190	break;	1190	break;
1191	case IP_RETOPTS:	1191	case IP_RETOPTS:
1192	val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;	1192	val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1193	break;	1193	break;
1194	case IP_PASSSEC:	1194	case IP_PASSSEC:
1195	val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;	1195	val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1196	break;	1196	break;
1197	case IP_RECVORIGDSTADDR:	1197	case IP_RECVORIGDSTADDR:
1198	val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;	1198	val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1199	break;	1199	break;
1200	case IP_TOS:	1200	case IP_TOS:
1201	val = inet->tos;	1201	val = inet->tos;
1202	break;	1202	break;
1203	case IP_TTL:	1203	case IP_TTL:
1204	val = (inet->uc_ttl == -1 ?	1204	val = (inet->uc_ttl == -1 ?
1205	sysctl_ip_default_ttl :	1205	sysctl_ip_default_ttl :
1206	inet->uc_ttl);	1206	inet->uc_ttl);
1207	break;	1207	break;
1208	case IP_HDRINCL:	1208	case IP_HDRINCL:
1209	val = inet->hdrincl;	1209	val = inet->hdrincl;
1210	break;	1210	break;
1211	case IP_NODEFRAG:	1211	case IP_NODEFRAG:
1212	val = inet->nodefrag;	1212	val = inet->nodefrag;
1213	break;	1213	break;
1214	case IP_MTU_DISCOVER:	1214	case IP_MTU_DISCOVER:
1215	val = inet->pmtudisc;	1215	val = inet->pmtudisc;
1216	break;	1216	break;
1217	case IP_MTU:	1217	case IP_MTU:
1218	{	1218	{
1219	struct dst_entry *dst;	1219	struct dst_entry *dst;
1220	val = 0;	1220	val = 0;
1221	dst = sk_dst_get(sk);	1221	dst = sk_dst_get(sk);
1222	if (dst) {	1222	if (dst) {
1223	val = dst_mtu(dst);	1223	val = dst_mtu(dst);
1224	dst_release(dst);	1224	dst_release(dst);
1225	}	1225	}
1226	if (!val) {	1226	if (!val) {
1227	release_sock(sk);	1227	release_sock(sk);
1228	return -ENOTCONN;	1228	return -ENOTCONN;
1229	}	1229	}
1230	break;	1230	break;
1231	}	1231	}
1232	case IP_RECVERR:	1232	case IP_RECVERR:
1233	val = inet->recverr;	1233	val = inet->recverr;
1234	break;	1234	break;
1235	case IP_MULTICAST_TTL:	1235	case IP_MULTICAST_TTL:
1236	val = inet->mc_ttl;	1236	val = inet->mc_ttl;
1237	break;	1237	break;
1238	case IP_MULTICAST_LOOP:	1238	case IP_MULTICAST_LOOP:
1239	val = inet->mc_loop;	1239	val = inet->mc_loop;
1240	break;	1240	break;
1241	case IP_UNICAST_IF:	1241	case IP_UNICAST_IF:
1242	val = (__force int)htonl((__u32) inet->uc_index);	1242	val = (__force int)htonl((__u32) inet->uc_index);
1243	break;	1243	break;
1244	case IP_MULTICAST_IF:	1244	case IP_MULTICAST_IF:
1245	{	1245	{
1246	struct in_addr addr;	1246	struct in_addr addr;
1247	len = min_t(unsigned int, len, sizeof(struct in_addr));	1247	len = min_t(unsigned int, len, sizeof(struct in_addr));
1248	addr.s_addr = inet->mc_addr;	1248	addr.s_addr = inet->mc_addr;
1249	release_sock(sk);	1249	release_sock(sk);
1250		1250
1251	if (put_user(len, optlen))	1251	if (put_user(len, optlen))
1252	return -EFAULT;	1252	return -EFAULT;
1253	if (copy_to_user(optval, &addr, len))	1253	if (copy_to_user(optval, &addr, len))
1254	return -EFAULT;	1254	return -EFAULT;
1255	return 0;	1255	return 0;
1256	}	1256	}
1257	case IP_MSFILTER:	1257	case IP_MSFILTER:
1258	{	1258	{
1259	struct ip_msfilter msf;	1259	struct ip_msfilter msf;
1260	int err;	1260	int err;
1261		1261
1262	if (len < IP_MSFILTER_SIZE(0)) {	1262	if (len < IP_MSFILTER_SIZE(0)) {
1263	release_sock(sk);	1263	release_sock(sk);
1264	return -EINVAL;	1264	return -EINVAL;
1265	}	1265	}
1266	if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {	1266	if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1267	release_sock(sk);	1267	release_sock(sk);
1268	return -EFAULT;	1268	return -EFAULT;
1269	}	1269	}
1270	err = ip_mc_msfget(sk, &msf,	1270	err = ip_mc_msfget(sk, &msf,
1271	(struct ip_msfilter __user *)optval, optlen);	1271	(struct ip_msfilter __user *)optval, optlen);
1272	release_sock(sk);	1272	release_sock(sk);
1273	return err;	1273	return err;
1274	}	1274	}
1275	case MCAST_MSFILTER:	1275	case MCAST_MSFILTER:
1276	{	1276	{
1277	struct group_filter gsf;	1277	struct group_filter gsf;
1278	int err;	1278	int err;
1279		1279
1280	if (len < GROUP_FILTER_SIZE(0)) {	1280	if (len < GROUP_FILTER_SIZE(0)) {
1281	release_sock(sk);	1281	release_sock(sk);
1282	return -EINVAL;	1282	return -EINVAL;
1283	}	1283	}
1284	if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {	1284	if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1285	release_sock(sk);	1285	release_sock(sk);
1286	return -EFAULT;	1286	return -EFAULT;
1287	}	1287	}
1288	err = ip_mc_gsfget(sk, &gsf,	1288	err = ip_mc_gsfget(sk, &gsf,
1289	(struct group_filter __user *)optval,	1289	(struct group_filter __user *)optval,
1290	optlen);	1290	optlen);
1291	release_sock(sk);	1291	release_sock(sk);
1292	return err;	1292	return err;
1293	}	1293	}
1294	case IP_MULTICAST_ALL:	1294	case IP_MULTICAST_ALL:
1295	val = inet->mc_all;	1295	val = inet->mc_all;
1296	break;	1296	break;
1297	case IP_PKTOPTIONS:	1297	case IP_PKTOPTIONS:
1298	{	1298	{
1299	struct msghdr msg;	1299	struct msghdr msg;
1300		1300
1301	release_sock(sk);	1301	release_sock(sk);
1302		1302
1303	if (sk->sk_type != SOCK_STREAM)	1303	if (sk->sk_type != SOCK_STREAM)
1304	return -ENOPROTOOPT;	1304	return -ENOPROTOOPT;
1305		1305
1306	msg.msg_control = optval;	1306	msg.msg_control = optval;
1307	msg.msg_controllen = len;	1307	msg.msg_controllen = len;
1308	msg.msg_flags = flags;	1308	msg.msg_flags = flags;
1309		1309
1310	if (inet->cmsg_flags & IP_CMSG_PKTINFO) {	1310	if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1311	struct in_pktinfo info;	1311	struct in_pktinfo info;
1312		1312
1313	info.ipi_addr.s_addr = inet->inet_rcv_saddr;	1313	info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1314	info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;	1314	info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1315	info.ipi_ifindex = inet->mc_index;	1315	info.ipi_ifindex = inet->mc_index;
1316	put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);	1316	put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1317	}	1317	}
1318	if (inet->cmsg_flags & IP_CMSG_TTL) {	1318	if (inet->cmsg_flags & IP_CMSG_TTL) {
1319	int hlim = inet->mc_ttl;	1319	int hlim = inet->mc_ttl;
1320	put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);	1320	put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1321	}	1321	}
1322	if (inet->cmsg_flags & IP_CMSG_TOS) {	1322	if (inet->cmsg_flags & IP_CMSG_TOS) {
1323	int tos = inet->rcv_tos;	1323	int tos = inet->rcv_tos;
1324	put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);	1324	put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1325	}	1325	}
1326	len -= msg.msg_controllen;	1326	len -= msg.msg_controllen;
1327	return put_user(len, optlen);	1327	return put_user(len, optlen);
1328	}	1328	}
1329	case IP_FREEBIND:	1329	case IP_FREEBIND:
1330	val = inet->freebind;	1330	val = inet->freebind;
1331	break;	1331	break;
1332	case IP_TRANSPARENT:	1332	case IP_TRANSPARENT:
1333	val = inet->transparent;	1333	val = inet->transparent;
1334	break;	1334	break;
1335	case IP_MINTTL:	1335	case IP_MINTTL:
1336	val = inet->min_ttl;	1336	val = inet->min_ttl;
1337	break;	1337	break;
1338	default:	1338	default:
1339	release_sock(sk);	1339	release_sock(sk);
1340	return -ENOPROTOOPT;	1340	return -ENOPROTOOPT;
1341	}	1341	}
1342	release_sock(sk);	1342	release_sock(sk);
1343		1343
1344	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {	1344	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1345	unsigned char ucval = (unsigned char)val;	1345	unsigned char ucval = (unsigned char)val;
1346	len = 1;	1346	len = 1;
1347	if (put_user(len, optlen))	1347	if (put_user(len, optlen))
1348	return -EFAULT;	1348	return -EFAULT;
1349	if (copy_to_user(optval, &ucval, 1))	1349	if (copy_to_user(optval, &ucval, 1))
1350	return -EFAULT;	1350	return -EFAULT;
1351	} else {	1351	} else {
1352	len = min_t(unsigned int, sizeof(int), len);	1352	len = min_t(unsigned int, sizeof(int), len);
1353	if (put_user(len, optlen))	1353	if (put_user(len, optlen))
1354	return -EFAULT;	1354	return -EFAULT;
1355	if (copy_to_user(optval, &val, len))	1355	if (copy_to_user(optval, &val, len))
1356	return -EFAULT;	1356	return -EFAULT;
1357	}	1357	}
1358	return 0;	1358	return 0;
1359	}	1359	}
1360		1360
1361	int ip_getsockopt(struct sock *sk, int level,	1361	int ip_getsockopt(struct sock *sk, int level,
1362	int optname, char __user optval, int __user optlen)	1362	int optname, char __user optval, int __user optlen)
1363	{	1363	{
1364	int err;	1364	int err;
1365		1365
1366	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);	1366	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1367	#ifdef CONFIG_NETFILTER	1367	#ifdef CONFIG_NETFILTER
1368	/* we need to exclude all possible ENOPROTOOPTs except default case */	1368	/* we need to exclude all possible ENOPROTOOPTs except default case */
1369	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&	1369	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1370	!ip_mroute_opt(optname)) {	1370	!ip_mroute_opt(optname)) {
1371	int len;	1371	int len;
1372		1372
1373	if (get_user(len, optlen))	1373	if (get_user(len, optlen))
1374	return -EFAULT;	1374	return -EFAULT;
1375		1375
1376	lock_sock(sk);	1376	lock_sock(sk);
1377	err = nf_getsockopt(sk, PF_INET, optname, optval,	1377	err = nf_getsockopt(sk, PF_INET, optname, optval,
1378	&len);	1378	&len);
1379	release_sock(sk);	1379	release_sock(sk);
1380	if (err >= 0)	1380	if (err >= 0)
1381	err = put_user(len, optlen);	1381	err = put_user(len, optlen);
1382	return err;	1382	return err;
1383	}	1383	}
1384	#endif	1384	#endif
1385	return err;	1385	return err;
1386	}	1386	}
1387	EXPORT_SYMBOL(ip_getsockopt);	1387	EXPORT_SYMBOL(ip_getsockopt);
1388		1388
1389	#ifdef CONFIG_COMPAT	1389	#ifdef CONFIG_COMPAT
1390	int compat_ip_getsockopt(struct sock *sk, int level, int optname,	1390	int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1391	char __user optval, int __user optlen)	1391	char __user optval, int __user optlen)
1392	{	1392	{
1393	int err;	1393	int err;
1394		1394
1395	if (optname == MCAST_MSFILTER)	1395	if (optname == MCAST_MSFILTER)
1396	return compat_mc_getsockopt(sk, level, optname, optval, optlen,	1396	return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1397	ip_getsockopt);	1397	ip_getsockopt);
1398		1398
1399	err = do_ip_getsockopt(sk, level, optname, optval, optlen,	1399	err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1400	MSG_CMSG_COMPAT);	1400	MSG_CMSG_COMPAT);
1401		1401
1402	#ifdef CONFIG_NETFILTER	1402	#ifdef CONFIG_NETFILTER
1403	/* we need to exclude all possible ENOPROTOOPTs except default case */	1403	/* we need to exclude all possible ENOPROTOOPTs except default case */
1404	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&	1404	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1405	!ip_mroute_opt(optname)) {	1405	!ip_mroute_opt(optname)) {
1406	int len;	1406	int len;
1407		1407
1408	if (get_user(len, optlen))	1408	if (get_user(len, optlen))
1409	return -EFAULT;	1409	return -EFAULT;
1410		1410
1411	lock_sock(sk);	1411	lock_sock(sk);
1412	err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);	1412	err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1413	release_sock(sk);	1413	release_sock(sk);
1414	if (err >= 0)	1414	if (err >= 0)
1415	err = put_user(len, optlen);	1415	err = put_user(len, optlen);
1416	return err;	1416	return err;
1417	}	1417	}
1418	#endif	1418	#endif
1419	return err;	1419	return err;
1420	}	1420	}
1421	EXPORT_SYMBOL(compat_ip_getsockopt);	1421	EXPORT_SYMBOL(compat_ip_getsockopt);
1422	#endif	1422	#endif
1423		1423

net/ipv4/tcp_output.c

Diff comments View file @ 8e3bff9

1	/*	1	/*
2	* INET An implementation of the TCP/IP protocol suite for the LINUX	2	* INET An implementation of the TCP/IP protocol suite for the LINUX
3	* operating system. INET is implemented using the BSD Socket	3	* operating system. INET is implemented using the BSD Socket
4	* interface as the means of communication with the user level.	4	* interface as the means of communication with the user level.
5	*	5	*
6	* Implementation of the Transmission Control Protocol(TCP).	6	* Implementation of the Transmission Control Protocol(TCP).
7	*	7	*
8	* Authors: Ross Biro	8	* Authors: Ross Biro
9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>	9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10	* Mark Evans, <evansmp@uhura.aston.ac.uk>	10	* Mark Evans, <evansmp@uhura.aston.ac.uk>
11	* Corey Minyard <wf-rch!minyard@relay.EU.net>	11	* Corey Minyard <wf-rch!minyard@relay.EU.net>
12	* Florian La Roche, <flla@stud.uni-sb.de>	12	* Florian La Roche, <flla@stud.uni-sb.de>
13	* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>	13	* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
14	* Linus Torvalds, <torvalds@cs.helsinki.fi>	14	* Linus Torvalds, <torvalds@cs.helsinki.fi>
15	* Alan Cox, <gw4pts@gw4pts.ampr.org>	15	* Alan Cox, <gw4pts@gw4pts.ampr.org>
16	* Matthew Dillon, <dillon@apollo.west.oic.com>	16	* Matthew Dillon, <dillon@apollo.west.oic.com>
17	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>	17	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18	* Jorge Cwik, <jorge@laser.satlink.net>	18	* Jorge Cwik, <jorge@laser.satlink.net>
19	*/	19	*/
20		20
21	/*	21	/*
22	* Changes: Pedro Roque : Retransmit queue handled by TCP.	22	* Changes: Pedro Roque : Retransmit queue handled by TCP.
23	* : Fragmentation on mtu decrease	23	* : Fragmentation on mtu decrease
24	* : Segment collapse on retransmit	24	* : Segment collapse on retransmit
25	* : AF independence	25	* : AF independence
26	*	26	*
27	* Linus Torvalds : send_delayed_ack	27	* Linus Torvalds : send_delayed_ack
28	* David S. Miller : Charge memory using the right skb	28	* David S. Miller : Charge memory using the right skb
29	* during syn/ack processing.	29	* during syn/ack processing.
30	* David S. Miller : Output engine completely rewritten.	30	* David S. Miller : Output engine completely rewritten.
31	* Andrea Arcangeli: SYNACK carry ts_recent in tsecr.	31	* Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
32	* Cacophonix Gaul : draft-minshall-nagle-01	32	* Cacophonix Gaul : draft-minshall-nagle-01
33	* J Hadi Salim : ECN support	33	* J Hadi Salim : ECN support
34	*	34	*
35	*/	35	*/
36		36
37	#define pr_fmt(fmt) "TCP: " fmt	37	#define pr_fmt(fmt) "TCP: " fmt
38		38
39	#include <net/tcp.h>	39	#include <net/tcp.h>
40		40