Commit a36b38aa2af61146ea80980a01cf6e952ab021c1
Committed by
Daniel Borkmann
1 parent
50e74c0131
xsk: add sock_diag interface for AF_XDP
This patch adds the sock_diag interface for querying sockets from user space. Tools like iproute2 ss(8) can use this interface to list open AF_XDP sockets. The user-space ABI is defined in linux/xdp_diag.h and includes netlink request and response structs. The request can query sockets and the response contains socket information about the rings, umems, inode and more. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Showing 6 changed files with 285 additions and 5 deletions Side-by-side Diff
include/uapi/linux/xdp_diag.h
1 | +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | |
2 | +/* | |
3 | + * xdp_diag: interface for query/monitor XDP sockets | |
4 | + * Copyright(c) 2019 Intel Corporation. | |
5 | + */ | |
6 | + | |
7 | +#ifndef _LINUX_XDP_DIAG_H | |
8 | +#define _LINUX_XDP_DIAG_H | |
9 | + | |
10 | +#include <linux/types.h> | |
11 | + | |
12 | +struct xdp_diag_req { | |
13 | + __u8 sdiag_family; | |
14 | + __u8 sdiag_protocol; | |
15 | + __u16 pad; | |
16 | + __u32 xdiag_ino; | |
17 | + __u32 xdiag_show; | |
18 | + __u32 xdiag_cookie[2]; | |
19 | +}; | |
20 | + | |
21 | +struct xdp_diag_msg { | |
22 | + __u8 xdiag_family; | |
23 | + __u8 xdiag_type; | |
24 | + __u16 pad; | |
25 | + __u32 xdiag_ino; | |
26 | + __u32 xdiag_cookie[2]; | |
27 | +}; | |
28 | + | |
29 | +#define XDP_SHOW_INFO (1 << 0) /* Basic information */ | |
30 | +#define XDP_SHOW_RING_CFG (1 << 1) | |
31 | +#define XDP_SHOW_UMEM (1 << 2) | |
32 | +#define XDP_SHOW_MEMINFO (1 << 3) | |
33 | + | |
34 | +enum { | |
35 | + XDP_DIAG_NONE, | |
36 | + XDP_DIAG_INFO, | |
37 | + XDP_DIAG_UID, | |
38 | + XDP_DIAG_RX_RING, | |
39 | + XDP_DIAG_TX_RING, | |
40 | + XDP_DIAG_UMEM, | |
41 | + XDP_DIAG_UMEM_FILL_RING, | |
42 | + XDP_DIAG_UMEM_COMPLETION_RING, | |
43 | + XDP_DIAG_MEMINFO, | |
44 | + __XDP_DIAG_MAX, | |
45 | +}; | |
46 | + | |
47 | +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) | |
48 | + | |
49 | +struct xdp_diag_info { | |
50 | + __u32 ifindex; | |
51 | + __u32 queue_id; | |
52 | +}; | |
53 | + | |
54 | +struct xdp_diag_ring { | |
55 | + __u32 entries; /*num descs */ | |
56 | +}; | |
57 | + | |
58 | +#define XDP_DU_F_ZEROCOPY (1 << 0) | |
59 | + | |
60 | +struct xdp_diag_umem { | |
61 | + __u64 size; | |
62 | + __u32 id; | |
63 | + __u32 num_pages; | |
64 | + __u32 chunk_size; | |
65 | + __u32 headroom; | |
66 | + __u32 ifindex; | |
67 | + __u32 queue_id; | |
68 | + __u32 flags; | |
69 | + __u32 refs; | |
70 | +}; | |
71 | + | |
72 | +#endif /* _LINUX_XDP_DIAG_H */ |
net/xdp/Kconfig
... | ... | @@ -5,4 +5,12 @@ |
5 | 5 | help |
6 | 6 | XDP sockets allows a channel between XDP programs and |
7 | 7 | userspace applications. |
8 | + | |
9 | +config XDP_SOCKETS_DIAG | |
10 | + tristate "XDP sockets: monitoring interface" | |
11 | + depends on XDP_SOCKETS | |
12 | + default n | |
13 | + help | |
14 | + Support for PF_XDP sockets monitoring interface used by the ss tool. | |
15 | + If unsure, say Y. |
net/xdp/Makefile
net/xdp/xsk.c
... | ... | @@ -27,13 +27,9 @@ |
27 | 27 | |
28 | 28 | #include "xsk_queue.h" |
29 | 29 | #include "xdp_umem.h" |
30 | +#include "xsk.h" | |
30 | 31 | |
31 | 32 | #define TX_BATCH_SIZE 16 |
32 | - | |
33 | -static struct xdp_sock *xdp_sk(struct sock *sk) | |
34 | -{ | |
35 | - return (struct xdp_sock *)sk; | |
36 | -} | |
37 | 33 | |
38 | 34 | bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) |
39 | 35 | { |
net/xdp/xsk.h
net/xdp/xsk_diag.c
1 | +// SPDX-License-Identifier: GPL-2.0 | |
2 | +/* XDP sockets monitoring support | |
3 | + * | |
4 | + * Copyright(c) 2019 Intel Corporation. | |
5 | + * | |
6 | + * Author: Björn Töpel <bjorn.topel@intel.com> | |
7 | + */ | |
8 | + | |
9 | +#include <linux/module.h> | |
10 | +#include <net/xdp_sock.h> | |
11 | +#include <linux/xdp_diag.h> | |
12 | +#include <linux/sock_diag.h> | |
13 | + | |
14 | +#include "xsk_queue.h" | |
15 | +#include "xsk.h" | |
16 | + | |
17 | +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) | |
18 | +{ | |
19 | + struct xdp_diag_info di = {}; | |
20 | + | |
21 | + di.ifindex = xs->dev ? xs->dev->ifindex : 0; | |
22 | + di.queue_id = xs->queue_id; | |
23 | + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); | |
24 | +} | |
25 | + | |
26 | +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, | |
27 | + struct sk_buff *nlskb) | |
28 | +{ | |
29 | + struct xdp_diag_ring dr = {}; | |
30 | + | |
31 | + dr.entries = queue->nentries; | |
32 | + return nla_put(nlskb, nl_type, sizeof(dr), &dr); | |
33 | +} | |
34 | + | |
35 | +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, | |
36 | + struct sk_buff *nlskb) | |
37 | +{ | |
38 | + int err = 0; | |
39 | + | |
40 | + if (xs->rx) | |
41 | + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); | |
42 | + if (!err && xs->tx) | |
43 | + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); | |
44 | + return err; | |
45 | +} | |
46 | + | |
47 | +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) | |
48 | +{ | |
49 | + struct xdp_umem *umem = xs->umem; | |
50 | + struct xdp_diag_umem du = {}; | |
51 | + int err; | |
52 | + | |
53 | + if (!umem) | |
54 | + return 0; | |
55 | + | |
56 | + du.id = umem->id; | |
57 | + du.size = umem->size; | |
58 | + du.num_pages = umem->npgs; | |
59 | + du.chunk_size = (__u32)(~umem->chunk_mask + 1); | |
60 | + du.headroom = umem->headroom; | |
61 | + du.ifindex = umem->dev ? umem->dev->ifindex : 0; | |
62 | + du.queue_id = umem->queue_id; | |
63 | + du.flags = 0; | |
64 | + if (umem->zc) | |
65 | + du.flags |= XDP_DU_F_ZEROCOPY; | |
66 | + du.refs = refcount_read(&umem->users); | |
67 | + | |
68 | + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); | |
69 | + | |
70 | + if (!err && umem->fq) | |
71 | + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); | |
72 | + if (!err && umem->cq) { | |
73 | + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, | |
74 | + nlskb); | |
75 | + } | |
76 | + return err; | |
77 | +} | |
78 | + | |
79 | +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, | |
80 | + struct xdp_diag_req *req, | |
81 | + struct user_namespace *user_ns, | |
82 | + u32 portid, u32 seq, u32 flags, int sk_ino) | |
83 | +{ | |
84 | + struct xdp_sock *xs = xdp_sk(sk); | |
85 | + struct xdp_diag_msg *msg; | |
86 | + struct nlmsghdr *nlh; | |
87 | + | |
88 | + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), | |
89 | + flags); | |
90 | + if (!nlh) | |
91 | + return -EMSGSIZE; | |
92 | + | |
93 | + msg = nlmsg_data(nlh); | |
94 | + memset(msg, 0, sizeof(*msg)); | |
95 | + msg->xdiag_family = AF_XDP; | |
96 | + msg->xdiag_type = sk->sk_type; | |
97 | + msg->xdiag_ino = sk_ino; | |
98 | + sock_diag_save_cookie(sk, msg->xdiag_cookie); | |
99 | + | |
100 | + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) | |
101 | + goto out_nlmsg_trim; | |
102 | + | |
103 | + if ((req->xdiag_show & XDP_SHOW_INFO) && | |
104 | + nla_put_u32(nlskb, XDP_DIAG_UID, | |
105 | + from_kuid_munged(user_ns, sock_i_uid(sk)))) | |
106 | + goto out_nlmsg_trim; | |
107 | + | |
108 | + if ((req->xdiag_show & XDP_SHOW_RING_CFG) && | |
109 | + xsk_diag_put_rings_cfg(xs, nlskb)) | |
110 | + goto out_nlmsg_trim; | |
111 | + | |
112 | + if ((req->xdiag_show & XDP_SHOW_UMEM) && | |
113 | + xsk_diag_put_umem(xs, nlskb)) | |
114 | + goto out_nlmsg_trim; | |
115 | + | |
116 | + if ((req->xdiag_show & XDP_SHOW_MEMINFO) && | |
117 | + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) | |
118 | + goto out_nlmsg_trim; | |
119 | + | |
120 | + nlmsg_end(nlskb, nlh); | |
121 | + return 0; | |
122 | + | |
123 | +out_nlmsg_trim: | |
124 | + nlmsg_cancel(nlskb, nlh); | |
125 | + return -EMSGSIZE; | |
126 | +} | |
127 | + | |
128 | +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) | |
129 | +{ | |
130 | + struct xdp_diag_req *req = nlmsg_data(cb->nlh); | |
131 | + struct net *net = sock_net(nlskb->sk); | |
132 | + int num = 0, s_num = cb->args[0]; | |
133 | + struct sock *sk; | |
134 | + | |
135 | + mutex_lock(&net->xdp.lock); | |
136 | + | |
137 | + sk_for_each(sk, &net->xdp.list) { | |
138 | + if (!net_eq(sock_net(sk), net)) | |
139 | + continue; | |
140 | + if (num++ < s_num) | |
141 | + continue; | |
142 | + | |
143 | + if (xsk_diag_fill(sk, nlskb, req, | |
144 | + sk_user_ns(NETLINK_CB(cb->skb).sk), | |
145 | + NETLINK_CB(cb->skb).portid, | |
146 | + cb->nlh->nlmsg_seq, NLM_F_MULTI, | |
147 | + sock_i_ino(sk)) < 0) { | |
148 | + num--; | |
149 | + break; | |
150 | + } | |
151 | + } | |
152 | + | |
153 | + mutex_unlock(&net->xdp.lock); | |
154 | + cb->args[0] = num; | |
155 | + return nlskb->len; | |
156 | +} | |
157 | + | |
158 | +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) | |
159 | +{ | |
160 | + struct netlink_dump_control c = { .dump = xsk_diag_dump }; | |
161 | + int hdrlen = sizeof(struct xdp_diag_req); | |
162 | + struct net *net = sock_net(nlskb->sk); | |
163 | + | |
164 | + if (nlmsg_len(hdr) < hdrlen) | |
165 | + return -EINVAL; | |
166 | + | |
167 | + if (!(hdr->nlmsg_flags & NLM_F_DUMP)) | |
168 | + return -EOPNOTSUPP; | |
169 | + | |
170 | + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); | |
171 | +} | |
172 | + | |
173 | +static const struct sock_diag_handler xsk_diag_handler = { | |
174 | + .family = AF_XDP, | |
175 | + .dump = xsk_diag_handler_dump, | |
176 | +}; | |
177 | + | |
178 | +static int __init xsk_diag_init(void) | |
179 | +{ | |
180 | + return sock_diag_register(&xsk_diag_handler); | |
181 | +} | |
182 | + | |
183 | +static void __exit xsk_diag_exit(void) | |
184 | +{ | |
185 | + sock_diag_unregister(&xsk_diag_handler); | |
186 | +} | |
187 | + | |
188 | +module_init(xsk_diag_init); | |
189 | +module_exit(xsk_diag_exit); | |
190 | +MODULE_LICENSE("GPL"); | |
191 | +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); |