From 9c1323736cf91aa46d43def8e8d2349f7498a203 Mon Sep 17 00:00:00 2001 From: Maryam Tahhan Date: Mon, 8 Apr 2024 09:09:21 -0400 Subject: [PATCH] net/af_xdp: fix multi-interface support for k8s [ upstream commit 9c1323736cf91aa46d43def8e8d2349f7498a203 ] The original 'use_cni' implementation, was added to enable support for the AF_XDP PMD in a K8s env without any escalated privileges. However 'use_cni' used a hardcoded socket rather than a configurable one. If a DPDK pod is requesting multiple net devices and these devices are from different pools, then the AF_XDP PMD attempts to mount all the netdev UDSes in the pod as /tmp/afxdp.sock. Which means that at best only 1 netdev will handshake correctly with the AF_XDP DP. This patch addresses this by making the socket parameter configurable using a new vdev param called 'dp_path' alongside the original 'use_cni' param. If the 'dp_path' parameter is not set alongside the 'use_cni' parameter, then it's configured inside the AF_XDP PMD (transparently to the user). This change has been tested with the AF_XDP DP PR 81[1], with both single and multiple interfaces. [1] https://github.com/intel/afxdp-plugins-for-kubernetes/pull/81 Fixes: 7fc6ae50369d ("net/af_xdp: support CNI Integration") Cc: stable@dpdk.org Signed-off-by: Maryam Tahhan Acked-by: Ciara Loftus --- drivers/net/af_xdp/compat.h | 15 ++++ drivers/net/af_xdp/meson.build | 4 ++ drivers/net/af_xdp/rte_eth_af_xdp.c | 97 ++++++++++++++++---------- 3 files changed, 141 insertions(+), 55 deletions(-) diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h index 28ea64aeaa..3b5a5c1ed5 100644 --- a/drivers/net/af_xdp/compat.h +++ b/drivers/net/af_xdp/compat.h @@ -46,6 +46,21 @@ create_shared_socket(struct xsk_socket **xsk_ptr __rte_unused, } #endif +#ifdef ETH_AF_XDP_UPDATE_XSKMAP +static __rte_always_inline int +update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx __rte_unused) +{ + return xsk_socket__update_xskmap(xsk, map_fd); +} +#else +static __rte_always_inline int +update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx) +{ + int fd = xsk_socket__fd(xsk); + return bpf_map_update_elem(map_fd, &xsk_queue_idx, &fd, 0); +} +#endif + #ifdef XDP_USE_NEED_WAKEUP static int tx_syscall_needed(struct xsk_ring_prod *q) diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build index 9f33e57fa2..280bfa8f80 100644 --- a/drivers/net/af_xdp/meson.build +++ b/drivers/net/af_xdp/meson.build @@ -77,6 +77,10 @@ if build dependencies : bpf_dep, args: cflags) cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH'] endif + if cc.has_function('xsk_socket__update_xskmap', prefix : xsk_check_prefix, + dependencies : ext_deps, args: cflags) + cflags += ['-DETH_AF_XDP_UPDATE_XSKMAP'] + endif endif require_iova_in_mbuf = false diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 6ba455bb9b..dcd590569e 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -83,12 +83,13 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE); #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds" +#define DP_BASE_PATH "/tmp/afxdp_dp" +#define DP_UDS_SOCK "afxdp.sock" #define MAX_LONG_OPT_SZ 64 #define UDS_MAX_FD_NUM 2 #define UDS_MAX_CMD_LEN 64 #define UDS_MAX_CMD_RESP 128 #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" -#define UDS_SOCK "/tmp/afxdp.sock" #define UDS_CONNECT_MSG "/connect" #define UDS_HOST_OK_MSG "/host_ok" #define UDS_HOST_NAK_MSG "/host_nak" @@ -171,6 +172,7 @@ struct pmd_internals { bool custom_prog_configured; bool force_copy; bool use_cni; + char dp_path[PATH_MAX]; struct bpf_map *map; struct rte_ether_addr eth_addr; @@ -191,6 +193,7 @@ struct pmd_process_private { #define ETH_AF_XDP_BUDGET_ARG "busy_budget" #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" #define ETH_AF_XDP_USE_CNI_ARG "use_cni" +#define ETH_AF_XDP_DP_PATH_ARG "dp_path" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, @@ -201,6 +204,7 @@ static const char * const valid_arguments[] = { ETH_AF_XDP_BUDGET_ARG, ETH_AF_XDP_FORCE_COPY_ARG, ETH_AF_XDP_USE_CNI_ARG, + ETH_AF_XDP_DP_PATH_ARG, NULL }; @@ -1352,7 +1356,7 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq) } static int -init_uds_sock(struct sockaddr_un *server) +init_uds_sock(struct sockaddr_un *server, const char *dp_path) { int sock; @@ -1363,7 +1367,7 @@ init_uds_sock(struct sockaddr_un *server) } server->sun_family = AF_UNIX; - strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path)); + strlcpy(server->sun_path, dp_path, sizeof(server->sun_path)); if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) { close(sock); @@ -1383,7 +1387,7 @@ struct msg_internal { }; static int -send_msg(int sock, char *request, int *fd) +send_msg(int sock, char *request, int *fd, const char *dp_path) { int snd; struct iovec iov; @@ -1394,7 +1398,7 @@ send_msg(int sock, char *request, int *fd) memset(&dst, 0, sizeof(dst)); dst.sun_family = AF_UNIX; - strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path)); + strlcpy(dst.sun_path, dp_path, sizeof(dst.sun_path)); /* Initialize message header structure */ memset(&msgh, 0, sizeof(msgh)); @@ -1471,8 +1475,8 @@ read_msg(int sock, char *response, struct sockaddr_un *s, int *fd) } static int -make_request_cni(int sock, struct sockaddr_un *server, char *request, - int *req_fd, char *response, int *out_fd) +make_request_dp(int sock, struct sockaddr_un *server, char *request, + int *req_fd, char *response, int *out_fd, const char *dp_path) { int rval; @@ -1484,7 +1488,7 @@ make_request_cni(int sock, struct sockaddr_un *server, char *request, if (req_fd == NULL) rval = write(sock, request, strlen(request)); else - rval = send_msg(sock, request, req_fd); + rval = send_msg(sock, request, req_fd, dp_path); if (rval < 0) { AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@ -1508,7 +1512,7 @@ check_response(char *response, char *exp_resp, long size) } static int -get_cni_fd(char *if_name) +uds_get_xskmap_fd(char *if_name, const char *dp_path) { char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP]; char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP]; @@ -1521,14 +1525,14 @@ get_cni_fd(char *if_name) return -1; memset(&server, 0, sizeof(server)); - sock = init_uds_sock(&server); + sock = init_uds_sock(&server, dp_path); if (sock < 0) return -1; - /* Initiates handshake to CNI send: /connect,hostname */ + /* Initiates handshake to the AF_XDP Device Plugin send: /connect,hostname */ snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname); memset(response, 0, sizeof(response)); - if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { + if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); goto err_close; } @@ -1542,7 +1546,7 @@ get_cni_fd(char *if_name) /* Request for "/version" */ strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN); memset(response, 0, sizeof(response)); - if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { + if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); goto err_close; } @@ -1550,7 +1554,7 @@ get_cni_fd(char *if_name) /* Request for file descriptor for netdev name*/ snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name); memset(response, 0, sizeof(response)); - if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { + if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); goto err_close; } @@ -1572,7 +1576,7 @@ get_cni_fd(char *if_name) /* Initiate close connection */ strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN); memset(response, 0, sizeof(response)); - if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { + if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); goto err_close; } @@ -1697,21 +1701,21 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, } if (internals->use_cni) { - int err, fd, map_fd; + int err, map_fd; - /* get socket fd from CNI plugin */ - map_fd = get_cni_fd(internals->if_name); + /* get socket fd from AF_XDP Device Plugin */ + map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path); if (map_fd < 0) { - AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n"); + AF_XDP_LOG(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin\n"); goto out_xsk; } - /* get socket fd */ - fd = xsk_socket__fd(rxq->xsk); - err = bpf_map_update_elem(map_fd, &rxq->xsk_queue_idx, &fd, 0); + + err = update_xskmap(rxq->xsk, map_fd, rxq->xsk_queue_idx); if (err) { - AF_XDP_LOG(ERR, "Failed to insert unprivileged xsk in map.\n"); + AF_XDP_LOG(ERR, "Failed to insert xsk in map.\n"); goto out_xsk; } + } else if (rxq->busy_budget) { ret = configure_preferred_busy_poll(rxq); if (ret) { @@ -1883,13 +1887,13 @@ static const struct eth_dev_ops ops = { .get_monitor_addr = eth_get_monitor_addr, }; -/* CNI option works in unprivileged container environment - * and ethernet device functionality will be reduced. So - * additional customiszed eth_dev_ops struct is needed - * for cni. Promiscuous enable and disable functionality - * is removed. +/* AF_XDP Device Plugin option works in unprivileged + * container environments and ethernet device functionality + * will be reduced. So additional customised eth_dev_ops + * struct is needed for the Device Plugin. Promiscuous + * enable and disable functionality is removed. **/ -static const struct eth_dev_ops ops_cni = { +static const struct eth_dev_ops ops_afxdp_dp = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, .dev_close = eth_dev_close, @@ -2025,7 +2029,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, int *queue_cnt, int *shared_umem, char *prog_path, - int *busy_budget, int *force_copy, int *use_cni) + int *busy_budget, int *force_copy, int *use_cni, + char *dp_path) { int ret; @@ -2071,6 +2076,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_DP_PATH_ARG, + &parse_prog_arg, dp_path); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -2110,7 +2120,7 @@ static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, int start_queue_idx, int queue_cnt, int shared_umem, const char *prog_path, int busy_budget, int force_copy, - int use_cni) + int use_cni, const char *dp_path) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -2140,6 +2150,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->shared_umem = shared_umem; internals->force_copy = force_copy; internals->use_cni = use_cni; + strlcpy(internals->dp_path, dp_path, PATH_MAX); if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, &internals->combined_queue_cnt)) { @@ -2201,7 +2212,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, if (!internals->use_cni) eth_dev->dev_ops = &ops; else - eth_dev->dev_ops = &ops_cni; + eth_dev->dev_ops = &ops_afxdp_dp; eth_dev->rx_pkt_burst = eth_af_xdp_rx; eth_dev->tx_pkt_burst = eth_af_xdp_tx; @@ -2330,6 +2341,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) int busy_budget = -1, ret; int force_copy = 0; int use_cni = 0; + char dp_path[PATH_MAX] = {'\0'}; struct rte_eth_dev *eth_dev = NULL; const char *name = rte_vdev_device_name(dev); @@ -2372,7 +2384,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, &xsk_queue_cnt, &shared_umem, prog_path, - &busy_budget, &force_copy, &use_cni) < 0) { + &busy_budget, &force_copy, &use_cni, dp_path) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -2386,7 +2398,19 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (use_cni && strnlen(prog_path, PATH_MAX)) { AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG); - return -EINVAL; + return -EINVAL; + } + + if (use_cni && !strnlen(dp_path, PATH_MAX)) { + snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_UDS_SOCK); + AF_XDP_LOG(INFO, "'%s' parameter not provided, setting value to '%s'\n", + ETH_AF_XDP_DP_PATH_ARG, dp_path); + } + + if (!use_cni && strnlen(dp_path, PATH_MAX)) { + AF_XDP_LOG(ERR, "'%s' parameter is set, but '%s' was not enabled\n", + ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG); + return -EINVAL; } if (strlen(if_name) == 0) { @@ -2412,7 +2436,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, xsk_queue_cnt, shared_umem, prog_path, - busy_budget, force_copy, use_cni); + busy_budget, force_copy, use_cni, dp_path); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -2473,4 +2497,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "xdp_prog= " "busy_budget= " "force_copy= " - "use_cni= "); + "use_cni= " + "dp_path= "); -- 2.33.0