Commit a36b38aa2af61146ea80980a01cf6e952ab021c1

Authored by Björn Töpel
Committed by Daniel Borkmann
1 parent 50e74c0131

xsk: add sock_diag interface for AF_XDP

This patch adds the sock_diag interface for querying sockets from user
space. Tools like iproute2 ss(8) can use this interface to list open
AF_XDP sockets.

The user-space ABI is defined in linux/xdp_diag.h and includes netlink
request and response structs. The request can query sockets and the
response contains socket information about the rings, umems, inode and
more.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Showing 6 changed files with 285 additions and 5 deletions Side-by-side Diff

include/uapi/linux/xdp_diag.h
  1 +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
  2 +/*
  3 + * xdp_diag: interface for query/monitor XDP sockets
  4 + * Copyright(c) 2019 Intel Corporation.
  5 + */
  6 +
  7 +#ifndef _LINUX_XDP_DIAG_H
  8 +#define _LINUX_XDP_DIAG_H
  9 +
  10 +#include <linux/types.h>
  11 +
  12 +struct xdp_diag_req {
  13 + __u8 sdiag_family;
  14 + __u8 sdiag_protocol;
  15 + __u16 pad;
  16 + __u32 xdiag_ino;
  17 + __u32 xdiag_show;
  18 + __u32 xdiag_cookie[2];
  19 +};
  20 +
  21 +struct xdp_diag_msg {
  22 + __u8 xdiag_family;
  23 + __u8 xdiag_type;
  24 + __u16 pad;
  25 + __u32 xdiag_ino;
  26 + __u32 xdiag_cookie[2];
  27 +};
  28 +
  29 +#define XDP_SHOW_INFO (1 << 0) /* Basic information */
  30 +#define XDP_SHOW_RING_CFG (1 << 1)
  31 +#define XDP_SHOW_UMEM (1 << 2)
  32 +#define XDP_SHOW_MEMINFO (1 << 3)
  33 +
  34 +enum {
  35 + XDP_DIAG_NONE,
  36 + XDP_DIAG_INFO,
  37 + XDP_DIAG_UID,
  38 + XDP_DIAG_RX_RING,
  39 + XDP_DIAG_TX_RING,
  40 + XDP_DIAG_UMEM,
  41 + XDP_DIAG_UMEM_FILL_RING,
  42 + XDP_DIAG_UMEM_COMPLETION_RING,
  43 + XDP_DIAG_MEMINFO,
  44 + __XDP_DIAG_MAX,
  45 +};
  46 +
  47 +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
  48 +
  49 +struct xdp_diag_info {
  50 + __u32 ifindex;
  51 + __u32 queue_id;
  52 +};
  53 +
  54 +struct xdp_diag_ring {
  55 + __u32 entries; /*num descs */
  56 +};
  57 +
  58 +#define XDP_DU_F_ZEROCOPY (1 << 0)
  59 +
  60 +struct xdp_diag_umem {
  61 + __u64 size;
  62 + __u32 id;
  63 + __u32 num_pages;
  64 + __u32 chunk_size;
  65 + __u32 headroom;
  66 + __u32 ifindex;
  67 + __u32 queue_id;
  68 + __u32 flags;
  69 + __u32 refs;
  70 +};
  71 +
  72 +#endif /* _LINUX_XDP_DIAG_H */
... ... @@ -5,4 +5,12 @@
5 5 help
6 6 XDP sockets allows a channel between XDP programs and
7 7 userspace applications.
  8 +
  9 +config XDP_SOCKETS_DIAG
  10 + tristate "XDP sockets: monitoring interface"
  11 + depends on XDP_SOCKETS
  12 + default n
  13 + help
  14 + Support for PF_XDP sockets monitoring interface used by the ss tool.
  15 + If unsure, say Y.
1 1 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
  2 +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
... ... @@ -27,13 +27,9 @@
27 27  
28 28 #include "xsk_queue.h"
29 29 #include "xdp_umem.h"
  30 +#include "xsk.h"
30 31  
31 32 #define TX_BATCH_SIZE 16
32   -
33   -static struct xdp_sock *xdp_sk(struct sock *sk)
34   -{
35   - return (struct xdp_sock *)sk;
36   -}
37 33  
38 34 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
39 35 {
  1 +/* SPDX-License-Identifier: GPL-2.0 */
  2 +/* Copyright(c) 2019 Intel Corporation. */
  3 +
  4 +#ifndef XSK_H_
  5 +#define XSK_H_
  6 +
  7 +static inline struct xdp_sock *xdp_sk(struct sock *sk)
  8 +{
  9 + return (struct xdp_sock *)sk;
  10 +}
  11 +
  12 +#endif /* XSK_H_ */
  1 +// SPDX-License-Identifier: GPL-2.0
  2 +/* XDP sockets monitoring support
  3 + *
  4 + * Copyright(c) 2019 Intel Corporation.
  5 + *
  6 + * Author: Björn Töpel <bjorn.topel@intel.com>
  7 + */
  8 +
  9 +#include <linux/module.h>
  10 +#include <net/xdp_sock.h>
  11 +#include <linux/xdp_diag.h>
  12 +#include <linux/sock_diag.h>
  13 +
  14 +#include "xsk_queue.h"
  15 +#include "xsk.h"
  16 +
  17 +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
  18 +{
  19 + struct xdp_diag_info di = {};
  20 +
  21 + di.ifindex = xs->dev ? xs->dev->ifindex : 0;
  22 + di.queue_id = xs->queue_id;
  23 + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
  24 +}
  25 +
  26 +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
  27 + struct sk_buff *nlskb)
  28 +{
  29 + struct xdp_diag_ring dr = {};
  30 +
  31 + dr.entries = queue->nentries;
  32 + return nla_put(nlskb, nl_type, sizeof(dr), &dr);
  33 +}
  34 +
  35 +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
  36 + struct sk_buff *nlskb)
  37 +{
  38 + int err = 0;
  39 +
  40 + if (xs->rx)
  41 + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
  42 + if (!err && xs->tx)
  43 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
  44 + return err;
  45 +}
  46 +
  47 +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
  48 +{
  49 + struct xdp_umem *umem = xs->umem;
  50 + struct xdp_diag_umem du = {};
  51 + int err;
  52 +
  53 + if (!umem)
  54 + return 0;
  55 +
  56 + du.id = umem->id;
  57 + du.size = umem->size;
  58 + du.num_pages = umem->npgs;
  59 + du.chunk_size = (__u32)(~umem->chunk_mask + 1);
  60 + du.headroom = umem->headroom;
  61 + du.ifindex = umem->dev ? umem->dev->ifindex : 0;
  62 + du.queue_id = umem->queue_id;
  63 + du.flags = 0;
  64 + if (umem->zc)
  65 + du.flags |= XDP_DU_F_ZEROCOPY;
  66 + du.refs = refcount_read(&umem->users);
  67 +
  68 + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
  69 +
  70 + if (!err && umem->fq)
  71 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
  72 + if (!err && umem->cq) {
  73 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
  74 + nlskb);
  75 + }
  76 + return err;
  77 +}
  78 +
  79 +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
  80 + struct xdp_diag_req *req,
  81 + struct user_namespace *user_ns,
  82 + u32 portid, u32 seq, u32 flags, int sk_ino)
  83 +{
  84 + struct xdp_sock *xs = xdp_sk(sk);
  85 + struct xdp_diag_msg *msg;
  86 + struct nlmsghdr *nlh;
  87 +
  88 + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
  89 + flags);
  90 + if (!nlh)
  91 + return -EMSGSIZE;
  92 +
  93 + msg = nlmsg_data(nlh);
  94 + memset(msg, 0, sizeof(*msg));
  95 + msg->xdiag_family = AF_XDP;
  96 + msg->xdiag_type = sk->sk_type;
  97 + msg->xdiag_ino = sk_ino;
  98 + sock_diag_save_cookie(sk, msg->xdiag_cookie);
  99 +
  100 + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
  101 + goto out_nlmsg_trim;
  102 +
  103 + if ((req->xdiag_show & XDP_SHOW_INFO) &&
  104 + nla_put_u32(nlskb, XDP_DIAG_UID,
  105 + from_kuid_munged(user_ns, sock_i_uid(sk))))
  106 + goto out_nlmsg_trim;
  107 +
  108 + if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
  109 + xsk_diag_put_rings_cfg(xs, nlskb))
  110 + goto out_nlmsg_trim;
  111 +
  112 + if ((req->xdiag_show & XDP_SHOW_UMEM) &&
  113 + xsk_diag_put_umem(xs, nlskb))
  114 + goto out_nlmsg_trim;
  115 +
  116 + if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
  117 + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
  118 + goto out_nlmsg_trim;
  119 +
  120 + nlmsg_end(nlskb, nlh);
  121 + return 0;
  122 +
  123 +out_nlmsg_trim:
  124 + nlmsg_cancel(nlskb, nlh);
  125 + return -EMSGSIZE;
  126 +}
  127 +
  128 +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
  129 +{
  130 + struct xdp_diag_req *req = nlmsg_data(cb->nlh);
  131 + struct net *net = sock_net(nlskb->sk);
  132 + int num = 0, s_num = cb->args[0];
  133 + struct sock *sk;
  134 +
  135 + mutex_lock(&net->xdp.lock);
  136 +
  137 + sk_for_each(sk, &net->xdp.list) {
  138 + if (!net_eq(sock_net(sk), net))
  139 + continue;
  140 + if (num++ < s_num)
  141 + continue;
  142 +
  143 + if (xsk_diag_fill(sk, nlskb, req,
  144 + sk_user_ns(NETLINK_CB(cb->skb).sk),
  145 + NETLINK_CB(cb->skb).portid,
  146 + cb->nlh->nlmsg_seq, NLM_F_MULTI,
  147 + sock_i_ino(sk)) < 0) {
  148 + num--;
  149 + break;
  150 + }
  151 + }
  152 +
  153 + mutex_unlock(&net->xdp.lock);
  154 + cb->args[0] = num;
  155 + return nlskb->len;
  156 +}
  157 +
  158 +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
  159 +{
  160 + struct netlink_dump_control c = { .dump = xsk_diag_dump };
  161 + int hdrlen = sizeof(struct xdp_diag_req);
  162 + struct net *net = sock_net(nlskb->sk);
  163 +
  164 + if (nlmsg_len(hdr) < hdrlen)
  165 + return -EINVAL;
  166 +
  167 + if (!(hdr->nlmsg_flags & NLM_F_DUMP))
  168 + return -EOPNOTSUPP;
  169 +
  170 + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
  171 +}
  172 +
  173 +static const struct sock_diag_handler xsk_diag_handler = {
  174 + .family = AF_XDP,
  175 + .dump = xsk_diag_handler_dump,
  176 +};
  177 +
  178 +static int __init xsk_diag_init(void)
  179 +{
  180 + return sock_diag_register(&xsk_diag_handler);
  181 +}
  182 +
  183 +static void __exit xsk_diag_exit(void)
  184 +{
  185 + sock_diag_unregister(&xsk_diag_handler);
  186 +}
  187 +
  188 +module_init(xsk_diag_init);
  189 +module_exit(xsk_diag_exit);
  190 +MODULE_LICENSE("GPL");
  191 +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);