dpu-utilities/0001-fix-suspend-in-fifo-the-reason-is-SIGURG-recieved-in.patch

309 lines
9.4 KiB
Diff
Raw Normal View History

From b6663e01b364c878e08206a4ccb3ea02a63d7ed0 Mon Sep 17 00:00:00 2001
From: liqiang <liqiang64@huawei.com>
Date: Thu, 7 Dec 2023 16:58:09 +0800
Subject: [PATCH 1/3] fix suspend in fifo, the reason is SIGURG recieved in
client thread when wait for ack
Signed-off-by: liqiang <liqiang64@huawei.com>
---
qtfs/include/log.h | 9 ++++---
qtfs/qtfs/fifo.c | 21 +++++++++++++----
qtfs/qtfs/qtfs-mod.c | 12 +++++-----
qtfs/qtfs_common/conn.c | 43 +++++++++++++++++++++++++++-------
qtfs/qtfs_server/server_fifo.c | 21 +++++++++++------
5 files changed, 77 insertions(+), 29 deletions(-)
diff --git a/qtfs/include/log.h b/qtfs/include/log.h
index 4df42b3..284464e 100644
--- a/qtfs/include/log.h
+++ b/qtfs/include/log.h
@@ -27,8 +27,11 @@ enum level {
#ifndef __KERNEL__
#include <time.h>
#define true 1
+#ifndef log_switch
+#define log_switch 1
+#endif
#define log_info(info, ...) \
- if (true) {\
+ if (log_switch) {\
time_t t; \
struct tm p; \
time(&t); \
@@ -39,7 +42,7 @@ enum level {
}
#define log_warn(info, ...) \
- if (true) {\
+ if (log_switch) {\
time_t t; \
struct tm p; \
time(&t); \
@@ -50,7 +53,7 @@ enum level {
}
#define log_err(info, ...) \
- if (true) {\
+ if (log_switch) {\
time_t t; \
struct tm p; \
time(&t); \
diff --git a/qtfs/qtfs/fifo.c b/qtfs/qtfs/fifo.c
index 2b636bb..55292de 100644
--- a/qtfs/qtfs/fifo.c
+++ b/qtfs/qtfs/fifo.c
@@ -117,6 +117,10 @@ ssize_t qtfs_fifo_readiter(struct kiocb *kio, struct iov_iter *iov)
int total = 0;
int ret;
+ if (sigismember(&current->pending.signal, SIGURG)) {
+ qtfs_err("signal SIGURG return eintr");
+ return -EINTR;
+ }
if (pvar == NULL || !virt_addr_valid(pvar)) {
qtfs_err("invalid fifo read req, private data is invalid");
return -EFAULT;
@@ -127,8 +131,9 @@ ssize_t qtfs_fifo_readiter(struct kiocb *kio, struct iov_iter *iov)
qtfs_info("fifo readiter len:%llu", req->len);
rsp = qtfs_remote_run(pvar, QTFS_REQ_READITER, sizeof(struct qtreq_fifo_read));
if (IS_ERR_OR_NULL(rsp) || rsp->err != 0) {
- qtfs_err("remote run failed. or errno:%d", (rsp == NULL) ? -1 : rsp->err);
- return -EFAULT;
+ qtfs_err("remote run failed. or errno:%d", IS_ERR_OR_NULL(rsp) ? -1 : rsp->err);
+ //return -EFAULT;
+ return (rsp == NULL) ? -EFAULT : (ssize_t)rsp;
}
while (total < rsp->len) {
@@ -154,6 +159,10 @@ ssize_t qtfs_fifo_writeiter(struct kiocb *kio, struct iov_iter *iov)
struct qtreq_fifo_write *req;
struct qtrsp_fifo_write *rsp;
+ if (sigismember(&current->pending.signal, SIGURG)) {
+ qtfs_err("signal SIGURG return eintr");
+ return -EINTR;
+ }
if (pvar == NULL || !virt_addr_valid(pvar)) {
qtfs_err("invalid fifo write req, private data is invalid");
return -EFAULT;
@@ -164,9 +173,10 @@ ssize_t qtfs_fifo_writeiter(struct kiocb *kio, struct iov_iter *iov)
pvar->iov_send = iov;
rsp = qtfs_remote_run(pvar, QTFS_REQ_WRITE, sizeof(struct qtreq_fifo_write));
if (IS_ERR_OR_NULL(rsp) || rsp->err != 0) {
- qtfs_err("fifo write remote run failed, or errno:%d", (rsp == NULL) ? -1 : rsp->err);
- return -EFAULT;
+ qtfs_err("fifo write remote run failed, or errno:%d", IS_ERR_OR_NULL(rsp) ? -1 : rsp->err);
+ return (rsp == NULL) ? -EFAULT : (ssize_t)rsp;
}
+ qtfs_info("fifo write over err:%d len:%llu", rsp->err, rsp->len);
return rsp->len;
}
@@ -179,11 +189,12 @@ int qtfs_fifo_release(struct inode *inode, struct file *file)
qtfs_err("invalid fifo write req, private data is invalid");
return -EFAULT;
}
- pvar->vec_recv.iov_len = QTFS_MSG_HEAD_LEN;
+ pvar->vec_recv.iov_len = pvar->recv_max;
rsp = qtfs_remote_run(pvar, QTFS_REQ_CLOSE, 0);
if (IS_ERR_OR_NULL(rsp)) {
qtfs_err("fifo close failed");
}
+ qtfs_info("fifo release req over");
qtfs_fifo_put_file(file);
return 0;
}
diff --git a/qtfs/qtfs/qtfs-mod.c b/qtfs/qtfs/qtfs-mod.c
index ad0bfc5..c4ef72c 100644
--- a/qtfs/qtfs/qtfs-mod.c
+++ b/qtfs/qtfs/qtfs-mod.c
@@ -66,7 +66,7 @@ void *qtfs_remote_run(struct qtfs_conn_var_s *pvar, unsigned int type, unsigned
if (ret <= 0) {
qtfs_err("qtfs remote run send failed, ret:%d pvar sendlen:%lu.", ret, pvar->vec_send.iov_len);
qtinfo_senderrinc(req->type);
- return NULL;
+ return (void *)ret;
}
qtinfo_sendinc(type);
@@ -87,7 +87,7 @@ retry:
}
// miss message retry would result in block
if (retrytimes >= 5) {
- qtfs_err("qtfs conn recv get mismatch seq_num too many times, stop retrying");
+ qtfs_err("qtfs conn recv get mismatch seq_num too many times, stop retrying, signal:0x%lx", (unsigned long)current->pending.signal.sig[0]);
return NULL;
}
retrytimes++;
@@ -98,9 +98,9 @@ retry:
qtinfo_cntinc(QTINF_RESTART_SYS);
qtinfo_recverrinc(req->type);
}
- if (retrytimes >= 5) {
- qtfs_err("qtfs conn recv get retry signal(%d) too many times, stop retrying", ret);
- return NULL;
+ if (retrytimes >= 5 && ret == -EINTR) {
+ qtfs_err("qtfs conn recv get retry signal(%d) too many times, stop retrying, signal:0x%lx", ret, (unsigned long)current->pending.signal.sig[0]);
+ return (void *)ret;
}
retrytimes++;
msleep(1);
@@ -109,7 +109,7 @@ retry:
if (ret < 0) {
qtfs_err("qtfs remote run error, req_type:%u, ret:%d.", req->type, ret);
qtinfo_recverrinc(req->type);
- return NULL;
+ return (void *)ret;
}
if (retrytimes > 0)
qtfs_debug("qtfs remote run retry times:%lu.", retrytimes);
diff --git a/qtfs/qtfs_common/conn.c b/qtfs/qtfs_common/conn.c
index 99809d2..4f2f048 100644
--- a/qtfs/qtfs_common/conn.c
+++ b/qtfs/qtfs_common/conn.c
@@ -274,6 +274,29 @@ int qtfs_conn_send(struct qtfs_conn_var_s *pvar)
return ret + iov_ret;
}
+#ifdef QTFS_CLIENT
+static int qtfs_mismatch_fix(struct qtfs_conn_var_s *pvar)
+{
+ struct qtreq *req = (struct qtreq *)pvar->vec_send.iov_base;
+ struct qtreq *rsp = (struct qtreq *)pvar->vec_recv.iov_base;
+ int ret;
+ size_t len;
+ if (req->seq_num == rsp->seq_num)
+ return 0;
+
+ qtfs_err("recv mismatch package, req type:%u rsp type:%u req seq:%lu rsp seq:%lu",
+ req->type, rsp->type, req->seq_num, rsp->seq_num);
+ // 如果收到错包到目前为止只接收了qtreq这个标准头下面把rsp->len长度
+ // 的socket缓冲区丢弃就行了
+ len = rsp->len;
+ ret = pvar->conn_ops->conn_recv(&pvar->conn_var, pvar->vec_recv.iov_base, len, true);
+ if (ret != len) {
+ qtfs_err("mismatch drop failed, recv len:%lu ret:%d", len, ret);
+ }
+ return -1;
+}
+#endif
+
int do_qtfs_conn_recv(struct qtfs_conn_var_s *pvar, bool block)
{
int ret = 0;
@@ -284,8 +307,10 @@ int do_qtfs_conn_recv(struct qtfs_conn_var_s *pvar, bool block)
int msglen = 0;
void *addr = NULL;
int leftlen = 0;
- int totallen = 0;
+#ifdef QTFS_CLIENT
+start:
+#endif
headlen = pvar->conn_ops->conn_recv(&pvar->conn_var, pvar->vec_recv.iov_base, QTFS_MSG_HEAD_LEN, block);
if (headlen <= 0) {
return headlen;
@@ -294,6 +319,12 @@ int do_qtfs_conn_recv(struct qtfs_conn_var_s *pvar, bool block)
qtfs_err("qtfs recv headlen not valid, expect(%ld), get(%d)", QTFS_MSG_HEAD_LEN, headlen);
return headlen;
}
+#ifdef QTFS_CLIENT
+ if (qtfs_mismatch_fix(pvar) != 0) {
+ qtfs_err("mismatch package recved");
+ goto start;
+ }
+#endif
load.iov_base = pvar->vec_recv.iov_base + QTFS_MSG_HEAD_LEN;
load.iov_len = pvar->vec_recv.iov_len - QTFS_MSG_HEAD_LEN;
@@ -318,7 +349,7 @@ retry:
qtinfo_recverrinc(rsp->type);
}
#endif
- if (retrytimes >= 5) {
+ if (retrytimes >= 5 && ret == -EINTR) {
qtfs_err("qtfs recv get retry signal(%d) too many times, stop retrying", ret);
return ret;
}
@@ -331,12 +362,8 @@ retry:
return ret;
}
- totallen += ret;
- if (totallen < msglen) {
- qtfs_err("qtfs conn recv %d msg, expect %d, goto retry", ret, msglen);
- addr += ret;
- leftlen -= ret;
- goto retry;
+ if (ret < msglen) {
+ qtfs_err("qtfs conn recv %d msg, expect %d", ret, msglen);
}
if (ret > rsp->len) {
diff --git a/qtfs/qtfs_server/server_fifo.c b/qtfs/qtfs_server/server_fifo.c
index e1d9bd1..fc4c67c 100644
--- a/qtfs/qtfs_server/server_fifo.c
+++ b/qtfs/qtfs_server/server_fifo.c
@@ -21,6 +21,7 @@
#include <sys/epoll.h>
#include <linux/vm_sockets.h>
+int log_switch = 0;
#include "req.h"
#include "log.h"
#include "libsocket.h"
@@ -227,22 +228,26 @@ static void fifo_proc_ack(struct fifo_event_t *evt, int type, int sockfd, char *
{
int ret;
struct qtreq rsp;
+ char *msg = (char *)malloc(sizeof(rsp) + arglen);
+ if (msg == NULL) {
+ log_err("malloc failed:%d.", errno);
+ return;
+ }
rsp.type = type;
rsp.err = 0;
rsp.seq_num = evt->seq_num;
rsp.len = arglen;
- ret = write(sockfd, &rsp, sizeof(struct qtreq));
+ memcpy(msg, &rsp, sizeof(rsp));
+ memcpy(&msg[sizeof(rsp)], arg, arglen);
+
+ ret = write(sockfd, msg, sizeof(struct qtreq) + arglen);
+ free(msg);
if (ret < 0) {
log_err("fifo ack type:%d failed, sockfd:%d err:%d", type, sockfd, errno);
return;
}
- ret = write(sockfd, arg, arglen);
- if (ret < 0) {
- log_err("fifo ack arg type:%d failed, sockfd:%d err:%d", type, sockfd, errno);
- return;
- }
log_info("Type:%d ack successed, sockfd:%d.", type, sockfd);
return;
}
@@ -639,6 +644,8 @@ int fifo_proc_main_sock(struct fifo_event_t *evt)
return FIFO_RET_OK;
}
+
+extern int engine_run;
void *fifo_server_main_thread(void *arg)
{
int indx = 0;
@@ -673,7 +680,7 @@ void *fifo_server_main_thread(void *arg)
fifo_add_event(sockfd, NULL, fifo_proc_main_sock, NULL, EPOLLIN);
- while (1) {
+ while (engine_run) {
int ret;
struct fifo_event_t *event;
int n = epoll_wait(epollfd, evts, EPOLL_MAX_EVENT_NUMS, 1000);
--
2.37.1 (Apple Git-137.1)