810 lines
24 KiB
Diff
810 lines
24 KiB
Diff
From 7fc6ae50369d75b9aa550072182fa92f8c4e13a4 Mon Sep 17 00:00:00 2001
|
|
From: Shibin Koikkara Reeny <shibin.koikkara.reeny@intel.com>
|
|
Date: Wed, 15 Feb 2023 16:30:14 +0000
|
|
Subject: [PATCH] net/af_xdp: support CNI Integration
|
|
|
|
[ upstream commit 7fc6ae50369d75b9aa550072182fa92f8c4e13a4 ]
|
|
|
|
Integrate support for the AF_XDP CNI and device plugin [1] so that the
|
|
DPDK AF_XDP PMD can work in an unprivileged container environment.
|
|
|
|
Part of the AF_XDP PMD initialization process involves loading an eBPF
|
|
program onto the given netdev. This operation requires privileges, which
|
|
prevents the PMD from being able to work in an unprivileged container
|
|
(without root access). The plugin CNI handles the program loading. CNI
|
|
open Unix Domain Socket (UDS) and waits listening for a client to make
|
|
requests over that UDS. The client(DPDK) connects and a "handshake"
|
|
occurs, then the File Descriptor which points to the XSKMAP associated
|
|
with the loaded eBPF program is handed over to the client. The client
|
|
can then proceed with creating an AF_XDP socket and inserting the socket
|
|
into the XSKMAP pointed to by the FD received on the UDS.
|
|
|
|
A new vdev arg "use_cni" is created to indicate user wishes to run
|
|
the PMD in unprivileged mode and to receive the XSKMAP FD from the CNI.
|
|
|
|
When this flag is set, the XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf
|
|
flag should be used when creating the socket, which tells libbpf not to
|
|
load the default libbpf program on the netdev. We tell libbpf not to do
|
|
this because the loading is handled by the CNI in this scenario.
|
|
|
|
Patch include howto doc explain how to configure AF_XDP CNI to working
|
|
with DPDK.
|
|
|
|
[1]: https://github.com/intel/afxdp-plugins-for-kubernetes
|
|
|
|
Signed-off-by: Shibin Koikkara Reeny <shibin.koikkara.reeny@intel.com>
|
|
Tested-by: Anatoly Burakov <anatoly.burakov@intel.com>
|
|
Reviewed-by: Qi Zhang <qi.z.zhang@intel.com>
|
|
Acked-by: John McNamara <john.mcnamara@intel.com>
|
|
Acked-by: Ferruh Yigit <ferruh.yigit@amd.com>
|
|
---
|
|
doc/guides/howto/af_xdp_cni.rst | 255 ++++++++++++++++++++
|
|
doc/guides/howto/index.rst | 1 +
|
|
drivers/net/af_xdp/rte_eth_af_xdp.c | 349 +++++++++++++++++++++++++++-
|
|
3 files changed, 613 insertions(+), 11 deletions(-)
|
|
create mode 100644 doc/guides/howto/af_xdp_cni.rst
|
|
|
|
diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst
|
|
new file mode 100644
|
|
index 0000000000..a38f02450e
|
|
--- /dev/null
|
|
+++ b/doc/guides/howto/af_xdp_cni.rst
|
|
@@ -0,0 +1,255 @@
|
|
+.. SPDX-License-Identifier: BSD-3-Clause
|
|
+ Copyright(c) 2023 Intel Corporation.
|
|
+
|
|
+Using a CNI with the AF_XDP driver
|
|
+==================================
|
|
+
|
|
+Introduction
|
|
+------------
|
|
+
|
|
+CNI, the Container Network Interface, is a technology for configuring
|
|
+container network interfaces
|
|
+and which can be used to setup Kubernetes networking.
|
|
+AF_XDP is a Linux socket Address Family that enables an XDP program
|
|
+to redirect packets to a memory buffer in userspace.
|
|
+
|
|
+This document explains how to enable the `AF_XDP Plugin for Kubernetes`_ within
|
|
+a DPDK application using the `AF_XDP PMD`_ to connect and use these technologies.
|
|
+
|
|
+.. _AF_XDP Plugin for Kubernetes: https://github.com/intel/afxdp-plugins-for-kubernetes
|
|
+
|
|
+
|
|
+Background
|
|
+----------
|
|
+
|
|
+The standard `AF_XDP PMD`_ initialization process involves loading an eBPF program
|
|
+onto the kernel netdev to be used by the PMD.
|
|
+This operation requires root or escalated Linux privileges
|
|
+and thus prevents the PMD from working in an unprivileged container.
|
|
+The AF_XDP CNI plugin handles this situation
|
|
+by providing a device plugin that performs the program loading.
|
|
+
|
|
+At a technical level the CNI opens a Unix Domain Socket and listens for a client
|
|
+to make requests over that socket.
|
|
+A DPDK application acting as a client connects and initiates a configuration "handshake".
|
|
+The client then receives a file descriptor which points to the XSKMAP
|
|
+associated with the loaded eBPF program.
|
|
+The XSKMAP is a BPF map of AF_XDP sockets (XSK).
|
|
+The client can then proceed with creating an AF_XDP socket
|
|
+and inserting that socket into the XSKMAP pointed to by the descriptor.
|
|
+
|
|
+The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
|
|
+to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor
|
|
+from the CNI.
|
|
+When this flag is set,
|
|
+the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag
|
|
+should be used when creating the socket
|
|
+to instruct libbpf not to load the default libbpf program on the netdev.
|
|
+Instead the loading is handled by the CNI.
|
|
+
|
|
+.. _AF_XDP PMD: https://doc.dpdk.org/guides/nics/af_xdp.html
|
|
+
|
|
+.. note::
|
|
+
|
|
+ The Unix Domain Socket file path appear in the end user is "/tmp/afxdp.sock".
|
|
+
|
|
+
|
|
+Prerequisites
|
|
+-------------
|
|
+
|
|
+Docker and container prerequisites:
|
|
+
|
|
+* Set up the device plugin
|
|
+ as described in the instructions for `AF_XDP Plugin for Kubernetes`_.
|
|
+
|
|
+* The Docker image should contain the libbpf and libxdp libraries,
|
|
+ which are dependencies for AF_XDP,
|
|
+ and should include support for the ``ethtool`` command.
|
|
+
|
|
+* The Pod should have enabled the capabilities ``CAP_NET_RAW`` and ``CAP_BPF``
|
|
+ for AF_XDP along with support for hugepages.
|
|
+
|
|
+* Increase locked memory limit so containers have enough memory for packet buffers.
|
|
+ For example:
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/limits.conf
|
|
+ [Service]
|
|
+ LimitMEMLOCK=infinity
|
|
+ EOF
|
|
+
|
|
+* dpdk-testpmd application should have AF_XDP feature enabled.
|
|
+
|
|
+ For further information see the docs for the: :doc:`../../nics/af_xdp`.
|
|
+
|
|
+
|
|
+Example
|
|
+-------
|
|
+
|
|
+Howto run dpdk-testpmd with CNI plugin:
|
|
+
|
|
+* Clone the CNI plugin
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ # git clone https://github.com/intel/afxdp-plugins-for-kubernetes.git
|
|
+
|
|
+* Build the CNI plugin
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ # cd afxdp-plugins-for-kubernetes/
|
|
+ # make build
|
|
+
|
|
+ .. note::
|
|
+
|
|
+ CNI plugin has a dependence on the config.json.
|
|
+
|
|
+ Sample Config.json
|
|
+
|
|
+ .. code-block:: json
|
|
+
|
|
+ {
|
|
+ "logLevel":"debug",
|
|
+ "logFile":"afxdp-dp-e2e.log",
|
|
+ "pools":[
|
|
+ {
|
|
+ "name":"e2e",
|
|
+ "mode":"primary",
|
|
+ "timeout":30,
|
|
+ "ethtoolCmds" : ["-L -device- combined 1"],
|
|
+ "devices":[
|
|
+ {
|
|
+ "name":"ens785f0"
|
|
+ }
|
|
+ ]
|
|
+ }
|
|
+ ]
|
|
+ }
|
|
+
|
|
+ For further reference please use the `config.json`_
|
|
+
|
|
+ .. _config.json: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/config.json
|
|
+
|
|
+* Create the Network Attachment definition
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ # kubectl create -f nad.yaml
|
|
+
|
|
+ Sample nad.yml
|
|
+
|
|
+ .. code-block:: yaml
|
|
+
|
|
+ apiVersion: "k8s.cni.cncf.io/v1"
|
|
+ kind: NetworkAttachmentDefinition
|
|
+ metadata:
|
|
+ name: afxdp-e2e-test
|
|
+ annotations:
|
|
+ k8s.v1.cni.cncf.io/resourceName: afxdp/e2e
|
|
+ spec:
|
|
+ config: '{
|
|
+ "cniVersion": "0.3.0",
|
|
+ "type": "afxdp",
|
|
+ "mode": "cdq",
|
|
+ "logFile": "afxdp-cni-e2e.log",
|
|
+ "logLevel": "debug",
|
|
+ "ipam": {
|
|
+ "type": "host-local",
|
|
+ "subnet": "192.168.1.0/24",
|
|
+ "rangeStart": "192.168.1.200",
|
|
+ "rangeEnd": "192.168.1.216",
|
|
+ "routes": [
|
|
+ { "dst": "0.0.0.0/0" }
|
|
+ ],
|
|
+ "gateway": "192.168.1.1"
|
|
+ }
|
|
+ }'
|
|
+
|
|
+ For further reference please use the `nad.yaml`_
|
|
+
|
|
+ .. _nad.yaml: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/nad.yaml
|
|
+
|
|
+* Build the Docker image
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ # docker build -t afxdp-e2e-test -f Dockerfile .
|
|
+
|
|
+ Sample Dockerfile:
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ FROM ubuntu:20.04
|
|
+ RUN apt-get update -y
|
|
+ RUN apt install build-essential libelf-dev -y
|
|
+ RUN apt-get install iproute2 acl -y
|
|
+ RUN apt install python3-pyelftools ethtool -y
|
|
+ RUN apt install libnuma-dev libjansson-dev libpcap-dev net-tools -y
|
|
+ RUN apt-get install clang llvm -y
|
|
+ COPY ./libbpf<version>.tar.gz /tmp
|
|
+ RUN cd /tmp && tar -xvmf libbpf<version>.tar.gz && cd libbpf/src && make install
|
|
+ COPY ./libxdp<version>.tar.gz /tmp
|
|
+ RUN cd /tmp && tar -xvmf libxdp<version>.tar.gz && cd libxdp && make install
|
|
+
|
|
+ .. note::
|
|
+
|
|
+ All the files that need to COPY-ed should be in the same directory as the Dockerfile
|
|
+
|
|
+* Run the Pod
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ # kubectl create -f pod.yaml
|
|
+
|
|
+ Sample pod.yaml:
|
|
+
|
|
+ .. code-block:: yaml
|
|
+
|
|
+ apiVersion: v1
|
|
+ kind: Pod
|
|
+ metadata:
|
|
+ name: afxdp-e2e-test
|
|
+ annotations:
|
|
+ k8s.v1.cni.cncf.io/networks: afxdp-e2e-test
|
|
+ spec:
|
|
+ containers:
|
|
+ - name: afxdp
|
|
+ image: afxdp-e2e-test:latest
|
|
+ imagePullPolicy: Never
|
|
+ env:
|
|
+ - name: LD_LIBRARY_PATH
|
|
+ value: /usr/lib64/:/usr/local/lib/
|
|
+ command: ["tail", "-f", "/dev/null"]
|
|
+ securityContext:
|
|
+ capabilities:
|
|
+ add:
|
|
+ - CAP_NET_RAW
|
|
+ - CAP_BPF
|
|
+ resources:
|
|
+ requests:
|
|
+ hugepages-2Mi: 2Gi
|
|
+ memory: 2Gi
|
|
+ afxdp/e2e: '1'
|
|
+ limits:
|
|
+ hugepages-2Mi: 2Gi
|
|
+ memory: 2Gi
|
|
+ afxdp/e2e: '1'
|
|
+
|
|
+ For further reference please use the `pod.yaml`_
|
|
+
|
|
+ .. _pod.yaml: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/pod-1c1d.yaml
|
|
+
|
|
+* Run DPDK with a command like the following:
|
|
+
|
|
+ .. code-block:: console
|
|
+
|
|
+ kubectl exec -i <Pod name> --container <containers name> -- \
|
|
+ /<Path>/dpdk-testpmd -l 0,1 --no-pci \
|
|
+ --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
|
|
+ -- --no-mlockall --in-memory
|
|
+
|
|
+For further reference please use the `e2e`_ test case in `AF_XDP Plugin for Kubernetes`_
|
|
+
|
|
+ .. _e2e: https://github.com/intel/afxdp-plugins-for-kubernetes/tree/v0.0.2/test/e2e
|
|
diff --git a/doc/guides/howto/index.rst b/doc/guides/howto/index.rst
|
|
index bf6337d021..71a3381c36 100644
|
|
--- a/doc/guides/howto/index.rst
|
|
+++ b/doc/guides/howto/index.rst
|
|
@@ -8,6 +8,7 @@ HowTo Guides
|
|
:maxdepth: 2
|
|
:numbered:
|
|
|
|
+ af_xdp_cni
|
|
lm_bond_virtio_sriov
|
|
lm_virtio_vhost_user
|
|
flow_bifurcation
|
|
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
|
|
index 38b9d36ab5..02c874d51e 100644
|
|
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
|
|
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
|
|
@@ -7,6 +7,7 @@
|
|
#include <string.h>
|
|
#include <netinet/in.h>
|
|
#include <net/if.h>
|
|
+#include <sys/un.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/ioctl.h>
|
|
#include <linux/if_ether.h>
|
|
@@ -82,6 +83,23 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
|
|
|
|
#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
|
|
|
|
+#define MAX_LONG_OPT_SZ 64
|
|
+#define UDS_MAX_FD_NUM 2
|
|
+#define UDS_MAX_CMD_LEN 64
|
|
+#define UDS_MAX_CMD_RESP 128
|
|
+#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
|
|
+#define UDS_SOCK "/tmp/afxdp.sock"
|
|
+#define UDS_CONNECT_MSG "/connect"
|
|
+#define UDS_HOST_OK_MSG "/host_ok"
|
|
+#define UDS_HOST_NAK_MSG "/host_nak"
|
|
+#define UDS_VERSION_MSG "/version"
|
|
+#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
|
|
+#define UDS_XSK_SOCKET_MSG "/xsk_socket"
|
|
+#define UDS_FD_ACK_MSG "/fd_ack"
|
|
+#define UDS_FD_NAK_MSG "/fd_nak"
|
|
+#define UDS_FIN_MSG "/fin"
|
|
+#define UDS_FIN_ACK_MSG "/fin_ack"
|
|
+
|
|
static int afxdp_dev_count;
|
|
|
|
/* Message header to synchronize fds via IPC */
|
|
@@ -152,6 +170,7 @@ struct pmd_internals {
|
|
char prog_path[PATH_MAX];
|
|
bool custom_prog_configured;
|
|
bool force_copy;
|
|
+ bool use_cni;
|
|
struct bpf_map *map;
|
|
|
|
struct rte_ether_addr eth_addr;
|
|
@@ -171,6 +190,7 @@ struct pmd_process_private {
|
|
#define ETH_AF_XDP_PROG_ARG "xdp_prog"
|
|
#define ETH_AF_XDP_BUDGET_ARG "busy_budget"
|
|
#define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
|
|
+#define ETH_AF_XDP_USE_CNI_ARG "use_cni"
|
|
|
|
static const char * const valid_arguments[] = {
|
|
ETH_AF_XDP_IFACE_ARG,
|
|
@@ -180,6 +200,7 @@ static const char * const valid_arguments[] = {
|
|
ETH_AF_XDP_PROG_ARG,
|
|
ETH_AF_XDP_BUDGET_ARG,
|
|
ETH_AF_XDP_FORCE_COPY_ARG,
|
|
+ ETH_AF_XDP_USE_CNI_ARG,
|
|
NULL
|
|
};
|
|
|
|
@@ -1130,7 +1151,8 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
|
|
ret = xsk_umem__create(&umem->umem, base_addr, umem_size,
|
|
&rxq->fq, &rxq->cq, &usr_config);
|
|
if (ret) {
|
|
- AF_XDP_LOG(ERR, "Failed to create umem\n");
|
|
+ AF_XDP_LOG(ERR, "Failed to create umem [%d]: [%s]\n",
|
|
+ errno, strerror(errno));
|
|
goto err;
|
|
}
|
|
umem->buffer = base_addr;
|
|
@@ -1315,6 +1337,245 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq)
|
|
return 0;
|
|
}
|
|
|
|
+static int
|
|
+init_uds_sock(struct sockaddr_un *server)
|
|
+{
|
|
+ int sock;
|
|
+
|
|
+ sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
|
|
+ if (sock < 0) {
|
|
+ AF_XDP_LOG(ERR, "Failed to opening stream socket\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ server->sun_family = AF_UNIX;
|
|
+ strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
|
|
+
|
|
+ if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) {
|
|
+ close(sock);
|
|
+ AF_XDP_LOG(ERR, "Error connecting stream socket errno = [%d]: [%s]\n",
|
|
+ errno, strerror(errno));
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return sock;
|
|
+}
|
|
+
|
|
+struct msg_internal {
|
|
+ char response[UDS_MAX_CMD_RESP];
|
|
+ int len_param;
|
|
+ int num_fds;
|
|
+ int fds[UDS_MAX_FD_NUM];
|
|
+};
|
|
+
|
|
+static int
|
|
+send_msg(int sock, char *request, int *fd)
|
|
+{
|
|
+ int snd;
|
|
+ struct iovec iov;
|
|
+ struct msghdr msgh;
|
|
+ struct cmsghdr *cmsg;
|
|
+ struct sockaddr_un dst;
|
|
+ char control[CMSG_SPACE(sizeof(*fd))];
|
|
+
|
|
+ memset(&dst, 0, sizeof(dst));
|
|
+ dst.sun_family = AF_UNIX;
|
|
+ strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
|
|
+
|
|
+ /* Initialize message header structure */
|
|
+ memset(&msgh, 0, sizeof(msgh));
|
|
+ memset(control, 0, sizeof(control));
|
|
+ iov.iov_base = request;
|
|
+ iov.iov_len = strlen(request);
|
|
+
|
|
+ msgh.msg_name = &dst;
|
|
+ msgh.msg_namelen = sizeof(dst);
|
|
+ msgh.msg_iov = &iov;
|
|
+ msgh.msg_iovlen = 1;
|
|
+ msgh.msg_control = control;
|
|
+ msgh.msg_controllen = sizeof(control);
|
|
+
|
|
+ /* Translate the FD. */
|
|
+ cmsg = CMSG_FIRSTHDR(&msgh);
|
|
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*fd));
|
|
+ cmsg->cmsg_level = SOL_SOCKET;
|
|
+ cmsg->cmsg_type = SCM_RIGHTS;
|
|
+ memcpy(CMSG_DATA(cmsg), fd, sizeof(*fd));
|
|
+
|
|
+ /* Send the request message. */
|
|
+ do {
|
|
+ snd = sendmsg(sock, &msgh, 0);
|
|
+ } while (snd < 0 && errno == EINTR);
|
|
+
|
|
+ return snd;
|
|
+}
|
|
+
|
|
+static int
|
|
+read_msg(int sock, char *response, struct sockaddr_un *s, int *fd)
|
|
+{
|
|
+ int msglen;
|
|
+ struct msghdr msgh;
|
|
+ struct iovec iov;
|
|
+ char control[CMSG_SPACE(sizeof(*fd))];
|
|
+ struct cmsghdr *cmsg;
|
|
+
|
|
+ /* Initialize message header structure */
|
|
+ memset(&msgh, 0, sizeof(msgh));
|
|
+ iov.iov_base = response;
|
|
+ iov.iov_len = UDS_MAX_CMD_RESP;
|
|
+
|
|
+ msgh.msg_name = s;
|
|
+ msgh.msg_namelen = sizeof(*s);
|
|
+ msgh.msg_iov = &iov;
|
|
+ msgh.msg_iovlen = 1;
|
|
+ msgh.msg_control = control;
|
|
+ msgh.msg_controllen = sizeof(control);
|
|
+
|
|
+ msglen = recvmsg(sock, &msgh, 0);
|
|
+
|
|
+ /* zero length message means socket was closed */
|
|
+ if (msglen == 0)
|
|
+ return 0;
|
|
+
|
|
+ if (msglen < 0) {
|
|
+ AF_XDP_LOG(ERR, "recvmsg failed, %s\n", strerror(errno));
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* read auxiliary FDs if any */
|
|
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
|
|
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
|
|
+ if (cmsg->cmsg_level == SOL_SOCKET &&
|
|
+ cmsg->cmsg_type == SCM_RIGHTS) {
|
|
+ memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ response[msglen] = '\0';
|
|
+ return msglen;
|
|
+}
|
|
+
|
|
+static int
|
|
+make_request_cni(int sock, struct sockaddr_un *server, char *request,
|
|
+ int *req_fd, char *response, int *out_fd)
|
|
+{
|
|
+ int rval;
|
|
+
|
|
+ AF_XDP_LOG(DEBUG, "Request: [%s]\n", request);
|
|
+
|
|
+ /* if no file descriptor to send then directly write to socket.
|
|
+ * else use sendmsg() to send the file descriptor.
|
|
+ */
|
|
+ if (req_fd == NULL)
|
|
+ rval = write(sock, request, strlen(request));
|
|
+ else
|
|
+ rval = send_msg(sock, request, req_fd);
|
|
+
|
|
+ if (rval < 0) {
|
|
+ AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno));
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ rval = read_msg(sock, response, server, out_fd);
|
|
+ if (rval <= 0) {
|
|
+ AF_XDP_LOG(ERR, "Read error %d\n", rval);
|
|
+ return -1;
|
|
+ }
|
|
+ AF_XDP_LOG(DEBUG, "Response: [%s]\n", request);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+check_response(char *response, char *exp_resp, long size)
|
|
+{
|
|
+ return strncmp(response, exp_resp, size);
|
|
+}
|
|
+
|
|
+static int
|
|
+get_cni_fd(char *if_name)
|
|
+{
|
|
+ char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP];
|
|
+ char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP];
|
|
+ struct sockaddr_un server;
|
|
+ int xsk_map_fd = -1, out_fd = 0;
|
|
+ int sock, err;
|
|
+
|
|
+ err = gethostname(hostname, MAX_LONG_OPT_SZ - 1);
|
|
+ if (err)
|
|
+ return -1;
|
|
+
|
|
+ memset(&server, 0, sizeof(server));
|
|
+ sock = init_uds_sock(&server);
|
|
+
|
|
+ /* Initiates handshake to CNI send: /connect,hostname */
|
|
+ snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname);
|
|
+ memset(response, 0, sizeof(response));
|
|
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ /* Expect /host_ok */
|
|
+ strlcpy(exp_resp, UDS_HOST_OK_MSG, UDS_MAX_CMD_LEN);
|
|
+ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response);
|
|
+ goto err_close;
|
|
+ }
|
|
+ /* Request for "/version" */
|
|
+ strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
|
|
+ memset(response, 0, sizeof(response));
|
|
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ /* Request for file descriptor for netdev name*/
|
|
+ snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name);
|
|
+ memset(response, 0, sizeof(response));
|
|
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ if (out_fd < 0) {
|
|
+ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ xsk_map_fd = out_fd;
|
|
+
|
|
+ /* Expect fd_ack with file descriptor */
|
|
+ strlcpy(exp_resp, UDS_FD_ACK_MSG, UDS_MAX_CMD_LEN);
|
|
+ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ /* Initiate close connection */
|
|
+ strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
|
|
+ memset(response, 0, sizeof(response));
|
|
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
|
|
+ goto err_close;
|
|
+ }
|
|
+
|
|
+ /* Connection close */
|
|
+ strlcpy(exp_resp, UDS_FIN_ACK_MSG, UDS_MAX_CMD_LEN);
|
|
+ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
|
|
+ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response);
|
|
+ goto err_close;
|
|
+ }
|
|
+ close(sock);
|
|
+
|
|
+ return xsk_map_fd;
|
|
+
|
|
+err_close:
|
|
+ close(sock);
|
|
+ return -1;
|
|
+}
|
|
+
|
|
static int
|
|
xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
|
|
int ring_size)
|
|
@@ -1363,6 +1624,10 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
|
|
cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
|
|
#endif
|
|
|
|
+ /* Disable libbpf from loading XDP program */
|
|
+ if (internals->use_cni)
|
|
+ cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
|
|
+
|
|
if (strnlen(internals->prog_path, PATH_MAX)) {
|
|
if (!internals->custom_prog_configured) {
|
|
ret = load_custom_xdp_prog(internals->prog_path,
|
|
@@ -1414,7 +1679,23 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
|
|
}
|
|
}
|
|
|
|
- if (rxq->busy_budget) {
|
|
+ if (internals->use_cni) {
|
|
+ int err, fd, map_fd;
|
|
+
|
|
+ /* get socket fd from CNI plugin */
|
|
+ map_fd = get_cni_fd(internals->if_name);
|
|
+ if (map_fd < 0) {
|
|
+ AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n");
|
|
+ goto out_xsk;
|
|
+ }
|
|
+ /* get socket fd */
|
|
+ fd = xsk_socket__fd(rxq->xsk);
|
|
+ err = bpf_map_update_elem(map_fd, &rxq->xsk_queue_idx, &fd, 0);
|
|
+ if (err) {
|
|
+ AF_XDP_LOG(ERR, "Failed to insert unprivileged xsk in map.\n");
|
|
+ goto out_xsk;
|
|
+ }
|
|
+ } else if (rxq->busy_budget) {
|
|
ret = configure_preferred_busy_poll(rxq);
|
|
if (ret) {
|
|
AF_XDP_LOG(ERR, "Failed configure busy polling.\n");
|
|
@@ -1585,6 +1866,27 @@ static const struct eth_dev_ops ops = {
|
|
.get_monitor_addr = eth_get_monitor_addr,
|
|
};
|
|
|
|
+/* CNI option works in unprivileged container environment
|
|
+ * and ethernet device functionality will be reduced. So
|
|
+ * additional customiszed eth_dev_ops struct is needed
|
|
+ * for cni. Promiscuous enable and disable functionality
|
|
+ * is removed.
|
|
+ **/
|
|
+static const struct eth_dev_ops ops_cni = {
|
|
+ .dev_start = eth_dev_start,
|
|
+ .dev_stop = eth_dev_stop,
|
|
+ .dev_close = eth_dev_close,
|
|
+ .dev_configure = eth_dev_configure,
|
|
+ .dev_infos_get = eth_dev_info,
|
|
+ .mtu_set = eth_dev_mtu_set,
|
|
+ .rx_queue_setup = eth_rx_queue_setup,
|
|
+ .tx_queue_setup = eth_tx_queue_setup,
|
|
+ .link_update = eth_link_update,
|
|
+ .stats_get = eth_stats_get,
|
|
+ .stats_reset = eth_stats_reset,
|
|
+ .get_monitor_addr = eth_get_monitor_addr,
|
|
+};
|
|
+
|
|
/** parse busy_budget argument */
|
|
static int
|
|
parse_budget_arg(const char *key __rte_unused,
|
|
@@ -1705,8 +2007,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
|
|
|
|
static int
|
|
parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
|
|
- int *queue_cnt, int *shared_umem, char *prog_path,
|
|
- int *busy_budget, int *force_copy)
|
|
+ int *queue_cnt, int *shared_umem, char *prog_path,
|
|
+ int *busy_budget, int *force_copy, int *use_cni)
|
|
{
|
|
int ret;
|
|
|
|
@@ -1747,6 +2049,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
|
|
if (ret < 0)
|
|
goto free_kvlist;
|
|
|
|
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_USE_CNI_ARG,
|
|
+ &parse_integer_arg, use_cni);
|
|
+ if (ret < 0)
|
|
+ goto free_kvlist;
|
|
+
|
|
free_kvlist:
|
|
rte_kvargs_free(kvlist);
|
|
return ret;
|
|
@@ -1784,8 +2091,9 @@ get_iface_info(const char *if_name,
|
|
|
|
static struct rte_eth_dev *
|
|
init_internals(struct rte_vdev_device *dev, const char *if_name,
|
|
- int start_queue_idx, int queue_cnt, int shared_umem,
|
|
- const char *prog_path, int busy_budget, int force_copy)
|
|
+ int start_queue_idx, int queue_cnt, int shared_umem,
|
|
+ const char *prog_path, int busy_budget, int force_copy,
|
|
+ int use_cni)
|
|
{
|
|
const char *name = rte_vdev_device_name(dev);
|
|
const unsigned int numa_node = dev->device.numa_node;
|
|
@@ -1814,6 +2122,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
|
|
#endif
|
|
internals->shared_umem = shared_umem;
|
|
internals->force_copy = force_copy;
|
|
+ internals->use_cni = use_cni;
|
|
|
|
if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
|
|
&internals->combined_queue_cnt)) {
|
|
@@ -1872,7 +2181,11 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
|
|
eth_dev->data->dev_link = pmd_link;
|
|
eth_dev->data->mac_addrs = &internals->eth_addr;
|
|
eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
|
|
- eth_dev->dev_ops = &ops;
|
|
+ if (!internals->use_cni)
|
|
+ eth_dev->dev_ops = &ops;
|
|
+ else
|
|
+ eth_dev->dev_ops = &ops_cni;
|
|
+
|
|
eth_dev->rx_pkt_burst = eth_af_xdp_rx;
|
|
eth_dev->tx_pkt_burst = eth_af_xdp_tx;
|
|
eth_dev->process_private = process_private;
|
|
@@ -1999,6 +2312,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
|
|
char prog_path[PATH_MAX] = {'\0'};
|
|
int busy_budget = -1, ret;
|
|
int force_copy = 0;
|
|
+ int use_cni = 0;
|
|
struct rte_eth_dev *eth_dev = NULL;
|
|
const char *name = rte_vdev_device_name(dev);
|
|
|
|
@@ -2041,11 +2355,23 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
|
|
|
|
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
|
|
&xsk_queue_cnt, &shared_umem, prog_path,
|
|
- &busy_budget, &force_copy) < 0) {
|
|
+ &busy_budget, &force_copy, &use_cni) < 0) {
|
|
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
+ if (use_cni && busy_budget > 0) {
|
|
+ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n",
|
|
+ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_BUDGET_ARG);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (use_cni && strnlen(prog_path, PATH_MAX)) {
|
|
+ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n",
|
|
+ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
if (strlen(if_name) == 0) {
|
|
AF_XDP_LOG(ERR, "Network interface must be specified\n");
|
|
return -EINVAL;
|
|
@@ -2068,8 +2394,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
|
|
busy_budget;
|
|
|
|
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
|
|
- xsk_queue_cnt, shared_umem, prog_path,
|
|
- busy_budget, force_copy);
|
|
+ xsk_queue_cnt, shared_umem, prog_path,
|
|
+ busy_budget, force_copy, use_cni);
|
|
if (eth_dev == NULL) {
|
|
AF_XDP_LOG(ERR, "Failed to init internals\n");
|
|
return -1;
|
|
@@ -2129,4 +2455,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
|
|
"shared_umem=<int> "
|
|
"xdp_prog=<string> "
|
|
"busy_budget=<int> "
|
|
- "force_copy=<int> ");
|
|
+ "force_copy=<int> "
|
|
+ "use_cni=<int> ");
|
|
--
|
|
2.33.0
|
|
|