dpdk/0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch

389 lines
14 KiB
Diff

From 9c1323736cf91aa46d43def8e8d2349f7498a203 Mon Sep 17 00:00:00 2001
From: Maryam Tahhan <mtahhan@redhat.com>
Date: Mon, 8 Apr 2024 09:09:21 -0400
Subject: [PATCH] net/af_xdp: fix multi-interface support for k8s
[ upstream commit 9c1323736cf91aa46d43def8e8d2349f7498a203 ]
The original 'use_cni' implementation, was added
to enable support for the AF_XDP PMD in a K8s env
without any escalated privileges.
However 'use_cni' used a hardcoded socket rather
than a configurable one. If a DPDK pod is requesting
multiple net devices and these devices are from
different pools, then the AF_XDP PMD attempts to
mount all the netdev UDSes in the pod as /tmp/afxdp.sock.
Which means that at best only 1 netdev will handshake
correctly with the AF_XDP DP. This patch addresses
this by making the socket parameter configurable using
a new vdev param called 'dp_path' alongside the
original 'use_cni' param. If the 'dp_path' parameter
is not set alongside the 'use_cni' parameter, then
it's configured inside the AF_XDP PMD (transparently
to the user). This change has been tested
with the AF_XDP DP PR 81[1], with both single and
multiple interfaces.
[1] https://github.com/intel/afxdp-plugins-for-kubernetes/pull/81
Fixes: 7fc6ae50369d ("net/af_xdp: support CNI Integration")
Cc: stable@dpdk.org
Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
Acked-by: Ciara Loftus <ciara.loftus@intel.com>
---
drivers/net/af_xdp/compat.h | 15 ++++
drivers/net/af_xdp/meson.build | 4 ++
drivers/net/af_xdp/rte_eth_af_xdp.c | 97 ++++++++++++++++----------
3 files changed, 141 insertions(+), 55 deletions(-)
diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h
index 28ea64aeaa..3b5a5c1ed5 100644
--- a/drivers/net/af_xdp/compat.h
+++ b/drivers/net/af_xdp/compat.h
@@ -46,6 +46,21 @@ create_shared_socket(struct xsk_socket **xsk_ptr __rte_unused,
}
#endif
+#ifdef ETH_AF_XDP_UPDATE_XSKMAP
+static __rte_always_inline int
+update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx __rte_unused)
+{
+ return xsk_socket__update_xskmap(xsk, map_fd);
+}
+#else
+static __rte_always_inline int
+update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx)
+{
+ int fd = xsk_socket__fd(xsk);
+ return bpf_map_update_elem(map_fd, &xsk_queue_idx, &fd, 0);
+}
+#endif
+
#ifdef XDP_USE_NEED_WAKEUP
static int
tx_syscall_needed(struct xsk_ring_prod *q)
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 9f33e57fa2..280bfa8f80 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -77,6 +77,10 @@ if build
dependencies : bpf_dep, args: cflags)
cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH']
endif
+ if cc.has_function('xsk_socket__update_xskmap', prefix : xsk_check_prefix,
+ dependencies : ext_deps, args: cflags)
+ cflags += ['-DETH_AF_XDP_UPDATE_XSKMAP']
+ endif
endif
require_iova_in_mbuf = false
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ba455bb9b..dcd590569e 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -83,12 +83,13 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+#define DP_BASE_PATH "/tmp/afxdp_dp"
+#define DP_UDS_SOCK "afxdp.sock"
#define MAX_LONG_OPT_SZ 64
#define UDS_MAX_FD_NUM 2
#define UDS_MAX_CMD_LEN 64
#define UDS_MAX_CMD_RESP 128
#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
-#define UDS_SOCK "/tmp/afxdp.sock"
#define UDS_CONNECT_MSG "/connect"
#define UDS_HOST_OK_MSG "/host_ok"
#define UDS_HOST_NAK_MSG "/host_nak"
@@ -171,6 +172,7 @@ struct pmd_internals {
bool custom_prog_configured;
bool force_copy;
bool use_cni;
+ char dp_path[PATH_MAX];
struct bpf_map *map;
struct rte_ether_addr eth_addr;
@@ -191,6 +193,7 @@ struct pmd_process_private {
#define ETH_AF_XDP_BUDGET_ARG "busy_budget"
#define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
#define ETH_AF_XDP_USE_CNI_ARG "use_cni"
+#define ETH_AF_XDP_DP_PATH_ARG "dp_path"
static const char * const valid_arguments[] = {
ETH_AF_XDP_IFACE_ARG,
@@ -201,6 +204,7 @@ static const char * const valid_arguments[] = {
ETH_AF_XDP_BUDGET_ARG,
ETH_AF_XDP_FORCE_COPY_ARG,
ETH_AF_XDP_USE_CNI_ARG,
+ ETH_AF_XDP_DP_PATH_ARG,
NULL
};
@@ -1352,7 +1356,7 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq)
}
static int
-init_uds_sock(struct sockaddr_un *server)
+init_uds_sock(struct sockaddr_un *server, const char *dp_path)
{
int sock;
@@ -1363,7 +1367,7 @@ init_uds_sock(struct sockaddr_un *server)
}
server->sun_family = AF_UNIX;
- strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
+ strlcpy(server->sun_path, dp_path, sizeof(server->sun_path));
if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) {
close(sock);
@@ -1383,7 +1387,7 @@ struct msg_internal {
};
static int
-send_msg(int sock, char *request, int *fd)
+send_msg(int sock, char *request, int *fd, const char *dp_path)
{
int snd;
struct iovec iov;
@@ -1394,7 +1398,7 @@ send_msg(int sock, char *request, int *fd)
memset(&dst, 0, sizeof(dst));
dst.sun_family = AF_UNIX;
- strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
+ strlcpy(dst.sun_path, dp_path, sizeof(dst.sun_path));
/* Initialize message header structure */
memset(&msgh, 0, sizeof(msgh));
@@ -1471,8 +1475,8 @@ read_msg(int sock, char *response, struct sockaddr_un *s, int *fd)
}
static int
-make_request_cni(int sock, struct sockaddr_un *server, char *request,
- int *req_fd, char *response, int *out_fd)
+make_request_dp(int sock, struct sockaddr_un *server, char *request,
+ int *req_fd, char *response, int *out_fd, const char *dp_path)
{
int rval;
@@ -1484,7 +1488,7 @@ make_request_cni(int sock, struct sockaddr_un *server, char *request,
if (req_fd == NULL)
rval = write(sock, request, strlen(request));
else
- rval = send_msg(sock, request, req_fd);
+ rval = send_msg(sock, request, req_fd, dp_path);
if (rval < 0) {
AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno));
@@ -1508,7 +1512,7 @@ check_response(char *response, char *exp_resp, long size)
}
static int
-get_cni_fd(char *if_name)
+uds_get_xskmap_fd(char *if_name, const char *dp_path)
{
char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP];
char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP];
@@ -1521,14 +1525,14 @@ get_cni_fd(char *if_name)
return -1;
memset(&server, 0, sizeof(server));
- sock = init_uds_sock(&server);
+ sock = init_uds_sock(&server, dp_path);
if (sock < 0)
return -1;
- /* Initiates handshake to CNI send: /connect,hostname */
+ /* Initiates handshake to the AF_XDP Device Plugin send: /connect,hostname */
snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1542,7 +1546,7 @@ get_cni_fd(char *if_name)
/* Request for "/version" */
strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1550,7 +1554,7 @@ get_cni_fd(char *if_name)
/* Request for file descriptor for netdev name*/
snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1572,7 +1576,7 @@ get_cni_fd(char *if_name)
/* Initiate close connection */
strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1697,21 +1701,21 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
}
if (internals->use_cni) {
- int err, fd, map_fd;
+ int err, map_fd;
- /* get socket fd from CNI plugin */
- map_fd = get_cni_fd(internals->if_name);
+ /* get socket fd from AF_XDP Device Plugin */
+ map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path);
if (map_fd < 0) {
- AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n");
+ AF_XDP_LOG(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin\n");
goto out_xsk;
}
- /* get socket fd */
- fd = xsk_socket__fd(rxq->xsk);
- err = bpf_map_update_elem(map_fd, &rxq->xsk_queue_idx, &fd, 0);
+
+ err = update_xskmap(rxq->xsk, map_fd, rxq->xsk_queue_idx);
if (err) {
- AF_XDP_LOG(ERR, "Failed to insert unprivileged xsk in map.\n");
+ AF_XDP_LOG(ERR, "Failed to insert xsk in map.\n");
goto out_xsk;
}
+
} else if (rxq->busy_budget) {
ret = configure_preferred_busy_poll(rxq);
if (ret) {
@@ -1883,13 +1887,13 @@ static const struct eth_dev_ops ops = {
.get_monitor_addr = eth_get_monitor_addr,
};
-/* CNI option works in unprivileged container environment
- * and ethernet device functionality will be reduced. So
- * additional customiszed eth_dev_ops struct is needed
- * for cni. Promiscuous enable and disable functionality
- * is removed.
+/* AF_XDP Device Plugin option works in unprivileged
+ * container environments and ethernet device functionality
+ * will be reduced. So additional customised eth_dev_ops
+ * struct is needed for the Device Plugin. Promiscuous
+ * enable and disable functionality is removed.
**/
-static const struct eth_dev_ops ops_cni = {
+static const struct eth_dev_ops ops_afxdp_dp = {
.dev_start = eth_dev_start,
.dev_stop = eth_dev_stop,
.dev_close = eth_dev_close,
@@ -2025,7 +2029,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
static int
parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
int *queue_cnt, int *shared_umem, char *prog_path,
- int *busy_budget, int *force_copy, int *use_cni)
+ int *busy_budget, int *force_copy, int *use_cni,
+ char *dp_path)
{
int ret;
@@ -2071,6 +2076,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
if (ret < 0)
goto free_kvlist;
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_DP_PATH_ARG,
+ &parse_prog_arg, dp_path);
+ if (ret < 0)
+ goto free_kvlist;
+
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
@@ -2110,7 +2120,7 @@ static struct rte_eth_dev *
init_internals(struct rte_vdev_device *dev, const char *if_name,
int start_queue_idx, int queue_cnt, int shared_umem,
const char *prog_path, int busy_budget, int force_copy,
- int use_cni)
+ int use_cni, const char *dp_path)
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node;
@@ -2140,6 +2150,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
internals->shared_umem = shared_umem;
internals->force_copy = force_copy;
internals->use_cni = use_cni;
+ strlcpy(internals->dp_path, dp_path, PATH_MAX);
if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
&internals->combined_queue_cnt)) {
@@ -2201,7 +2212,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
if (!internals->use_cni)
eth_dev->dev_ops = &ops;
else
- eth_dev->dev_ops = &ops_cni;
+ eth_dev->dev_ops = &ops_afxdp_dp;
eth_dev->rx_pkt_burst = eth_af_xdp_rx;
eth_dev->tx_pkt_burst = eth_af_xdp_tx;
@@ -2330,6 +2341,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
int busy_budget = -1, ret;
int force_copy = 0;
int use_cni = 0;
+ char dp_path[PATH_MAX] = {'\0'};
struct rte_eth_dev *eth_dev = NULL;
const char *name = rte_vdev_device_name(dev);
@@ -2372,7 +2384,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
&xsk_queue_cnt, &shared_umem, prog_path,
- &busy_budget, &force_copy, &use_cni) < 0) {
+ &busy_budget, &force_copy, &use_cni, dp_path) < 0) {
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
return -EINVAL;
}
@@ -2386,7 +2398,19 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
if (use_cni && strnlen(prog_path, PATH_MAX)) {
AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n",
ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG);
- return -EINVAL;
+ return -EINVAL;
+ }
+
+ if (use_cni && !strnlen(dp_path, PATH_MAX)) {
+ snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_UDS_SOCK);
+ AF_XDP_LOG(INFO, "'%s' parameter not provided, setting value to '%s'\n",
+ ETH_AF_XDP_DP_PATH_ARG, dp_path);
+ }
+
+ if (!use_cni && strnlen(dp_path, PATH_MAX)) {
+ AF_XDP_LOG(ERR, "'%s' parameter is set, but '%s' was not enabled\n",
+ ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG);
+ return -EINVAL;
}
if (strlen(if_name) == 0) {
@@ -2412,7 +2436,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
xsk_queue_cnt, shared_umem, prog_path,
- busy_budget, force_copy, use_cni);
+ busy_budget, force_copy, use_cni, dp_path);
if (eth_dev == NULL) {
AF_XDP_LOG(ERR, "Failed to init internals\n");
return -1;
@@ -2473,4 +2497,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
"xdp_prog=<string> "
"busy_budget=<int> "
"force_copy=<int> "
- "use_cni=<int> ");
+ "use_cni=<int> "
+ "dp_path=<string> ");
--
2.33.0