!98 [sync] PR-93: strace: fix potential deadlock during cleanup

From: @openeuler-sync-bot 
Reviewed-by: @SuperSix173 
Signed-off-by: @SuperSix173
This commit is contained in:
openeuler-ci-bot 2024-12-27 10:45:29 +00:00 committed by Gitee
commit f2baaa2674
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 575 additions and 1 deletions

View File

@ -0,0 +1,91 @@
From 651640fd5faf0e24d9cd88c596854c108f732329 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Fri, 24 Nov 2023 08:00:00 +0000
Subject: [PATCH] Move print_debug_info() definition before cleanup()
* src/strace.c (print_debug_info): Move before cleanup().
Reference: https://github.com/strace/strace/commit/651640fd5faf0e24d9cd88c596854c108f732329
Conflict: Since the current version has not merged commits 462fa296 and d0e007f6,
based on the current version, move print_debug_info() definition before cleanup().
---
src/strace.c | 56 ++++++++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index d3fa44f30..28f5b4368 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -2779,34 +2779,6 @@ pid2tcb(const int pid)
}
static void
-cleanup(int fatal_sig)
-{
- unsigned int i;
- struct tcb *tcp;
-
- if (!fatal_sig)
- fatal_sig = SIGTERM;
-
- for (i = 0; i < tcbtabsize; i++) {
- tcp = tcbtab[i];
- if (!tcp->pid)
- continue;
- debug_func_msg("looking at pid %u", tcp->pid);
- if (tcp->pid == strace_child) {
- kill(tcp->pid, SIGCONT);
- kill(tcp->pid, fatal_sig);
- }
- detach(tcp);
- }
-}
-
-static void
-interrupt(int sig)
-{
- interrupted = sig;
-}
-
-static void
print_debug_info(const int pid, int status)
{
const unsigned int event = (unsigned int) status >> 16;
@@ -2845,6 +2817,34 @@ print_debug_info(const int pid, int stat
error_msg("[wait(0x%06x) = %u] %s%s", status, pid, buf, evbuf);
}
+static void
+cleanup(int fatal_sig)
+{
+ unsigned int i;
+ struct tcb *tcp;
+
+ if (!fatal_sig)
+ fatal_sig = SIGTERM;
+
+ for (i = 0; i < tcbtabsize; i++) {
+ tcp = tcbtab[i];
+ if (!tcp->pid)
+ continue;
+ debug_func_msg("looking at pid %u", tcp->pid);
+ if (tcp->pid == strace_child) {
+ kill(tcp->pid, SIGCONT);
+ kill(tcp->pid, fatal_sig);
+ }
+ detach(tcp);
+ }
+}
+
+static void
+interrupt(int sig)
+{
+ interrupted = sig;
+}
+
static struct tcb *
maybe_allocate_tcb(const int pid, int status)
{
--
2.33.0

View File

@ -0,0 +1,51 @@
From 83a2af722ce2c2b38f3d6d9f4114015521449a2a Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Sat, 25 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out droptcb_verbose() from detach()
* src/strace.c (droptcb_verbose): New function.
(detach): Use it instead of droptcb.
Reference: https://github.com/strace/strace/commit/83a2af722ce2c2b38f3d6d9f4114015521449a2a
Conflict: NA
---
src/strace.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 28f5b4368..351842792 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1148,6 +1148,16 @@ droptcb(struct tcb *tcp)
memset(tcp, 0, sizeof(*tcp));
}
+static void
+droptcb_verbose(struct tcb *tcp)
+{
+ if (!is_number_in_set(QUIET_ATTACH, quiet_set)
+ && (tcp->flags & TCB_ATTACHED))
+ error_msg("Process %u detached", tcp->pid);
+
+ droptcb(tcp);
+}
+
/* Detach traced process.
* Never call DETACH twice on the same process as both unattached and
* attached-unstopped processes give the same ESRCH. For unattached process we
@@ -1302,11 +1312,7 @@ detach(struct tcb *tcp)
}
drop:
- if (!is_number_in_set(QUIET_ATTACH, quiet_set)
- && (tcp->flags & TCB_ATTACHED))
- error_msg("Process %u detached", tcp->pid);
-
- droptcb(tcp);
+ droptcb_verbose(tcp);
}
static void
--
2.33.0

View File

@ -0,0 +1,127 @@
From 4c4c339585b523167e336d9b379fe4c00dc94dca Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Sun, 26 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out interrupt_or_stop() from detach()
* src/strace.c (interrupt_or_stop): New function.
(detach): Use it.
Reference: https://github.com/strace/strace/commit/4c4c339585b523167e336d9b379fe4c00dc94dca
Conflict: NA
---
src/strace.c | 49 ++++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 351842792..385cbaa72 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1158,17 +1158,10 @@ droptcb_verbose(struct tcb *tcp)
droptcb(tcp);
}
-/* Detach traced process.
- * Never call DETACH twice on the same process as both unattached and
- * attached-unstopped processes give the same ESRCH. For unattached process we
- * would SIGSTOP it and wait for its SIGSTOP notification forever.
- */
-static void
-detach(struct tcb *tcp)
+/* Returns true when the tracee has to be waited for. */
+static bool
+interrupt_or_stop(struct tcb *tcp)
{
- int error;
- int status;
-
/*
* Linux wrongly insists the child be stopped
* before detaching. Arghh. We go through hoops
@@ -1176,24 +1169,24 @@ detach(struct tcb *tcp)
*/
if (!(tcp->flags & TCB_ATTACHED))
- goto drop;
+ return false;
/* We attached but possibly didn't see the expected SIGSTOP.
* We must catch exactly one as otherwise the detached process
* would be left stopped (process state T).
*/
if (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)
- goto wait_loop;
+ return true;
- error = ptrace(PTRACE_DETACH, tcp->pid, 0, 0);
+ int error = ptrace(PTRACE_DETACH, tcp->pid, 0, 0);
if (!error) {
/* On a clear day, you can see forever. */
- goto drop;
+ return false;
}
if (errno != ESRCH) {
/* Shouldn't happen. */
perror_func_msg("ptrace(PTRACE_DETACH,%u)", tcp->pid);
- goto drop;
+ return false;
}
/* ESRCH: process is either not stopped or doesn't exist. */
if (my_tkill(tcp->pid, 0) < 0) {
@@ -1201,7 +1194,7 @@ detach(struct tcb *tcp)
/* Shouldn't happen. */
perror_func_msg("tkill(%u,0)", tcp->pid);
/* else: process doesn't exist. */
- goto drop;
+ return false;
}
/* Process is not stopped, need to stop it. */
if (use_seize) {
@@ -1213,26 +1206,40 @@ detach(struct tcb *tcp)
*/
error = ptrace(PTRACE_INTERRUPT, tcp->pid, 0, 0);
if (!error)
- goto wait_loop;
+ return true;
if (errno != ESRCH)
perror_func_msg("ptrace(PTRACE_INTERRUPT,%u)", tcp->pid);
} else {
error = my_tkill(tcp->pid, SIGSTOP);
if (!error)
- goto wait_loop;
+ return true;
if (errno != ESRCH)
perror_func_msg("tkill(%u,SIGSTOP)", tcp->pid);
}
+
/* Either process doesn't exist, or some weird error. */
- goto drop;
+ return false;
+}
- wait_loop:
- /* We end up here in three cases:
+/* Detach traced process.
+ * Never call DETACH twice on the same process as both unattached and
+ * attached-unstopped processes give the same ESRCH. For unattached process we
+ * would SIGSTOP it and wait for its SIGSTOP notification forever.
+ */
+static void
+detach(struct tcb *tcp)
+{
+ if (!interrupt_or_stop(tcp))
+ goto drop;
+
+ /*
+ * We end up here in three cases:
* 1. We sent PTRACE_INTERRUPT (use_seize case)
* 2. We sent SIGSTOP (!use_seize)
* 3. Attach SIGSTOP was already pending (TCB_IGNORE_ONE_SIGSTOP set)
*/
for (;;) {
+ int status;
unsigned int sig;
if (waitpid(tcp->pid, &status, __WALL) < 0) {
if (errno == EINTR)
--
2.33.0

View File

@ -0,0 +1,179 @@
From d275fc0312255ddffa878b70da34d945b4fd8212 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Mon, 27 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out detach_interrupted_or_stopped() from detach()
* src/strace.c (detach_interrupted_or_stopped): New function.
(detach): Use it.
Reference: https://github.com/strace/strace/commit/d275fc0312255ddffa878b70da34d945b4fd8212
Conflict: NA
---
src/strace.c | 136 +++++++++++++++++++++++++++------------------------
1 file changed, 72 insertions(+), 64 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 385cbaa72..ef3360b95 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1221,6 +1221,77 @@ interrupt_or_stop(struct tcb *tcp)
return false;
}
+/* Returns true if the tracee can be passed to droptcb. */
+static bool
+detach_interrupted_or_stopped(struct tcb *tcp, int status)
+{
+ if (!WIFSTOPPED(status)) {
+ /*
+ * Tracee exited or was killed by signal.
+ * We shouldn't normally reach this place:
+ * we don't want to consume exit status.
+ * Consider "strace -p PID" being ^C-ed:
+ * we want merely to detach from PID.
+ *
+ * However, we _can_ end up here if tracee
+ * was SIGKILLed.
+ */
+ return true;
+ }
+ unsigned int sig = WSTOPSIG(status);
+ debug_msg("detach wait: event:%d sig:%d",
+ (unsigned) status >> 16, sig);
+ if (use_seize) {
+ unsigned event = (unsigned)status >> 16;
+ if (event == PTRACE_EVENT_STOP /*&& sig == SIGTRAP*/) {
+ /*
+ * sig == SIGTRAP: PTRACE_INTERRUPT stop.
+ * sig == other: process was already stopped
+ * with this stopping sig (see tests/detach-stopped).
+ * Looks like re-injecting this sig is not necessary
+ * in DETACH for the tracee to remain stopped.
+ */
+ sig = 0;
+ }
+ /*
+ * PTRACE_INTERRUPT is not guaranteed to produce
+ * the above event if other ptrace-stop is pending.
+ * See tests/detach-sleeping testcase:
+ * strace got SIGINT while tracee is sleeping.
+ * We sent PTRACE_INTERRUPT.
+ * We see syscall exit, not PTRACE_INTERRUPT stop.
+ * We won't get PTRACE_INTERRUPT stop
+ * if we would CONT now. Need to DETACH.
+ */
+ if (sig == syscall_trap_sig)
+ sig = 0;
+ /* else: not sure in which case we can be here.
+ * Signal stop? Inject it while detaching.
+ */
+ ptrace_restart(PTRACE_DETACH, tcp, sig);
+ return true;
+ }
+ /* Note: this check has to be after use_seize check */
+ /* (else, in use_seize case SIGSTOP will be mistreated) */
+ if (sig == SIGSTOP) {
+ /* Detach, suppressing SIGSTOP */
+ ptrace_restart(PTRACE_DETACH, tcp, 0);
+ return true;
+ }
+ if (sig == syscall_trap_sig)
+ sig = 0;
+ /* Can't detach just yet, may need to wait for SIGSTOP */
+ int error = ptrace_restart(PTRACE_CONT, tcp, sig);
+ if (error < 0) {
+ /* Should not happen.
+ * Note: ptrace_restart returns 0 on ESRCH, so it's not it.
+ * ptrace_restart already emitted error message.
+ */
+ return true;
+ }
+ return false;
+}
+
/* Detach traced process.
* Never call DETACH twice on the same process as both unattached and
* attached-unstopped processes give the same ESRCH. For unattached process we
@@ -1240,7 +1311,6 @@ detach(struct tcb *tcp)
*/
for (;;) {
int status;
- unsigned int sig;
if (waitpid(tcp->pid, &status, __WALL) < 0) {
if (errno == EINTR)
continue;
@@ -1252,70 +1322,8 @@ detach(struct tcb *tcp)
perror_func_msg("waitpid(%u)", tcp->pid);
break;
}
- if (!WIFSTOPPED(status)) {
- /*
- * Tracee exited or was killed by signal.
- * We shouldn't normally reach this place:
- * we don't want to consume exit status.
- * Consider "strace -p PID" being ^C-ed:
- * we want merely to detach from PID.
- *
- * However, we _can_ end up here if tracee
- * was SIGKILLed.
- */
- break;
- }
- sig = WSTOPSIG(status);
- debug_msg("detach wait: event:%d sig:%d",
- (unsigned) status >> 16, sig);
- if (use_seize) {
- unsigned event = (unsigned)status >> 16;
- if (event == PTRACE_EVENT_STOP /*&& sig == SIGTRAP*/) {
- /*
- * sig == SIGTRAP: PTRACE_INTERRUPT stop.
- * sig == other: process was already stopped
- * with this stopping sig (see tests/detach-stopped).
- * Looks like re-injecting this sig is not necessary
- * in DETACH for the tracee to remain stopped.
- */
- sig = 0;
- }
- /*
- * PTRACE_INTERRUPT is not guaranteed to produce
- * the above event if other ptrace-stop is pending.
- * See tests/detach-sleeping testcase:
- * strace got SIGINT while tracee is sleeping.
- * We sent PTRACE_INTERRUPT.
- * We see syscall exit, not PTRACE_INTERRUPT stop.
- * We won't get PTRACE_INTERRUPT stop
- * if we would CONT now. Need to DETACH.
- */
- if (sig == syscall_trap_sig)
- sig = 0;
- /* else: not sure in which case we can be here.
- * Signal stop? Inject it while detaching.
- */
- ptrace_restart(PTRACE_DETACH, tcp, sig);
- break;
- }
- /* Note: this check has to be after use_seize check */
- /* (else, in use_seize case SIGSTOP will be mistreated) */
- if (sig == SIGSTOP) {
- /* Detach, suppressing SIGSTOP */
- ptrace_restart(PTRACE_DETACH, tcp, 0);
+ if (detach_interrupted_or_stopped(tcp, status))
break;
- }
- if (sig == syscall_trap_sig)
- sig = 0;
- /* Can't detach just yet, may need to wait for SIGSTOP */
- error = ptrace_restart(PTRACE_CONT, tcp, sig);
- if (error < 0) {
- /* Should not happen.
- * Note: ptrace_restart returns 0 on ESRCH, so it's not it.
- * ptrace_restart already emitted error message.
- */
- break;
- }
}
drop:
--
2.33.0

View File

@ -0,0 +1,118 @@
From 3cfa90b54367168cc1e473af004675f12816e955 Mon Sep 17 00:00:00 2001
From: donghaobo <donghaobo@kuaishou.com>
Date: Fri, 17 Nov 2023 13:55:22 +0800
Subject: [PATCH] strace: fix potential deadlock during cleanup
strace -f can potentially deadlock during cleanup if the tracee
is using vfork or CLONE_VFORK to spawn threads.
On linux, calling vfork will cause the calling thread to 'D' state
until the child process calls execve or exit. Therefore, strace
should detach the child process first, otherwise it can wait
indefinitely for the calling thread in 'D' state.
Reproducer:
/*
* Start tracing with strace -f,
* then press Ctrl-C within 9 seconds to interrupt.
*/
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
int main(void)
{
pid_t pid = vfork();
if (pid < 0)
return 1;
if (pid) {
int status;
waitpid(pid, &status, 0);
return 0;
}
sleep(9);
_exit(0);
}
* src/strace.c (cleanup): Do not call detach() for each tracee
one by one as it can deadlock, instead call interrupt_or_stop()
for each tracee and after that enter a wait loop calling
detach_interrupted_or_stopped() for each tracee as soon as
they become ready.
* NEWS: Mention this fix.
Co-authored-by: Dmitry V. Levin <ldv@strace.io>
Reference: https://github.com/strace/strace/commit/3cfa90b54367168cc1e473af004675f12816e955
Conflict: Remove NEWS, src/strace.c adapt patch context
---
NEWS | 2 ++
src/strace.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/src/strace.c b/src/strace.c
index ef3360b95..04b3c1dfe 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -2847,6 +2847,8 @@ cleanup(int fatal_sig)
if (!fatal_sig)
fatal_sig = SIGTERM;
+ size_t num_to_wait = 0;
+
for (i = 0; i < tcbtabsize; i++) {
tcp = tcbtab[i];
if (!tcp->pid)
@@ -3177,7 +3179,48 @@ cleanup(int fatal_sig)
kill(tcp->pid, SIGCONT);
kill(tcp->pid, fatal_sig);
}
- detach(tcp);
+ if (interrupt_or_stop(tcp))
+ ++num_to_wait;
+ else
+ droptcb_verbose(tcp);
+ }
+
+ while (num_to_wait) {
+ int status;
+ pid_t pid = waitpid(-1, &status, __WALL);
+
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ /* ECHILD is not expected */
+ perror_func_msg("waitpid(-1, __WALL)");
+ break;
+ }
+
+ if (pid == popen_pid) {
+ if (!WIFSTOPPED(status))
+ popen_pid = 0;
+ continue;
+ }
+
+ if (debug_flag)
+ print_debug_info(pid, status);
+
+ struct tcb *tcp = pid2tcb(pid);
+ if (!tcp) {
+ if (!is_number_in_set(QUIET_EXIT, quiet_set)) {
+ /*
+ * This can happen if we inherited an unknown child.
+ */
+ error_msg("Exit of unknown pid %u ignored", pid);
+ }
+ continue;
+ }
+
+ if (detach_interrupted_or_stopped(tcp, status)) {
+ droptcb_verbose(tcp);
+ --num_to_wait;
+ }
}
}
--
2.33.0

View File

@ -3,7 +3,7 @@
Summary: Tracks and displays system calls associated with a running process
Name: strace
Version: 5.14
Release: 5
Release: 6
# The test suite is GPLv2+, all the rest is LGPLv2.1+.
License: LGPL-2.1+ and GPL-2.0+
# Some distros require Group tag to be present,
@ -18,6 +18,11 @@ Patch1: strace-5.14-sw.patch
%endif
Patch2: strace-5.14-loongarch64.patch
Patch3: strace-5.14-solve-ilp32-strace-build-error.patch
Patch4: backport-0001-Move-print_debug_info-definition-before-cleanup.patch
Patch5: backport-0002-Factor-out-droptcb_verbose-from-detach.patch
Patch6: backport-0003-Factor-out-interrupt_or_stop-from-detach.patch
Patch7: backport-0004-Factor-out-detach_interrupted_or_stopped-from-detach.patch
Patch8: backport-0005-strace-fix-potential-deadlock-during-cleanup.patch
BuildRequires: gcc gzip
# Install Bluetooth headers for AF_BLUETOOTH sockets decoding.
@ -94,6 +99,9 @@ wait
%{_mandir}/man1/*
%changelog
* Fri Dec 27 2024 wangxiao <wangxiao184@h-partners.com> - 5.14-6
- strace: fix potential deadlock during cleanup
* Wed Feb 28 2024 liuchao <liuchao173@huawei.com> - 5.14-5
- remove redundant judgments