!793 backport upstream patch to solve systemd-shutdown hang all the time caused by fsync blocked by absence of DM mapping table

From: @zhang-yao-2022 
Reviewed-by: @protkhn, @xujing99 
Signed-off-by: @xujing99
This commit is contained in:
openeuler-ci-bot 2024-12-11 01:30:35 +00:00 committed by Gitee
commit 9f8ae498fb
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 413 additions and 2 deletions

View File

@ -0,0 +1,159 @@
From 758760a3610e3c6674de8a1d51b12b991eafef7c Mon Sep 17 00:00:00 2001
From: Mike Yuan <me@yhndnzj.com>
Date: Wed, 5 Jun 2024 17:53:27 +0200
Subject: [PATCH] shutdown: clean up sync_with_progress a bit
Also, ignore the error on caller's side.
Conflict:context adapt in main
Reference:https://github.com/systemd/systemd/commit/758760a3610e3c6674de8a1d51b12b991eafef7c
---
src/shutdown/shutdown.c | 68 ++++++++++++++++++-----------------------
1 file changed, 30 insertions(+), 38 deletions(-)
diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c
index a98cfc4..a03a126 100644
--- a/src/shutdown/shutdown.c
+++ b/src/shutdown/shutdown.c
@@ -20,6 +20,7 @@
#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "def.h"
+#include "errno-util.h"
#include "exec-util.h"
#include "fd-util.h"
#include "fileio.h"
@@ -186,7 +187,9 @@ static int switch_root_initramfs(void) {
static int sync_making_progress(unsigned long long *prev_dirty) {
_cleanup_fclose_ FILE *f = NULL;
unsigned long long val = 0;
- int ret;
+ int r;
+
+ assert(prev_dirty);
f = fopen("/proc/meminfo", "re");
if (!f)
@@ -194,13 +197,12 @@ static int sync_making_progress(unsigned long long *prev_dirty) {
for (;;) {
_cleanup_free_ char *line = NULL;
- unsigned long long ull = 0;
- int q;
+ unsigned long long ull;
- q = read_line(f, LONG_LINE_MAX, &line);
- if (q < 0)
- return log_warning_errno(q, "Failed to parse /proc/meminfo: %m");
- if (q == 0)
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse /proc/meminfo: %m");
+ if (r == 0)
break;
if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
@@ -208,25 +210,20 @@ static int sync_making_progress(unsigned long long *prev_dirty) {
errno = 0;
if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
- if (errno != 0)
- log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
- else
- log_warning("Failed to parse /proc/meminfo");
-
+ log_warning_errno(errno_or_else(EIO), "Failed to parse /proc/meminfo field, ignoring: %m");
return false;
}
val += ull;
}
- ret = *prev_dirty > val;
+ r = *prev_dirty > val;
*prev_dirty = val;
- return ret;
+ return r;
}
-static void sync_with_progress(void) {
+static int sync_with_progress(void) {
unsigned long long dirty = ULLONG_MAX;
- unsigned checks;
pid_t pid;
int r;
@@ -236,37 +233,32 @@ static void sync_with_progress(void) {
* the progress. If the timeout lapses, the assumption is that the particular sync stalled. */
r = asynchronous_sync(&pid);
- if (r < 0) {
- log_error_errno(r, "Failed to fork sync(): %m");
- return;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork sync(): %m");
log_info("Syncing filesystems and block devices.");
/* Start monitoring the sync operation. If more than
* SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
* we assume that the sync is stalled */
- for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
+ for (unsigned checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
if (r == 0)
- /* Sync finished without error.
- * (The sync itself does not return an error code) */
- return;
- else if (r == -ETIMEDOUT) {
- /* Reset the check counter if the "Dirty" value is
- * decreasing */
- if (sync_making_progress(&dirty) > 0)
- checks = 0;
- } else {
- log_error_errno(r, "Failed to sync filesystems and block devices: %m");
- return;
- }
+ /* Sync finished without error (sync() call itself does not return an error code) */
+ return 0;
+ if (r != -ETIMEDOUT)
+ return log_error_errno(r, "Failed to sync filesystems and block devices: %m");
+
+ /* Reset the check counter if we made some progress */
+ if (sync_making_progress(&dirty) > 0)
+ checks = 0;
}
- /* Only reached in the event of a timeout. We should issue a kill
- * to the stray process. */
- log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
+ /* Only reached in the event of a timeout. We should issue a kill to the stray process. */
(void) kill(pid, SIGKILL);
+ return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT),
+ "Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".",
+ pid);
}
static int read_current_sysctl_printk_log_level(void) {
@@ -387,7 +379,7 @@ int main(int argc, char *argv[]) {
* desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
* result. */
if (!in_container)
- sync_with_progress();
+ (void) sync_with_progress();
disable_coredumps();
disable_binfmt();
@@ -556,7 +548,7 @@ int main(int argc, char *argv[]) {
* sync'ed things already once above, but we did some more work since then which might have caused IO, hence
* let's do it once more. Do not remove this sync, data corruption will result. */
if (!in_container)
- sync_with_progress();
+ (void) sync_with_progress();
if (streq(arg_verb, "exit")) {
if (in_container) {
--
2.33.0

View File

@ -0,0 +1,78 @@
From b4b66b26620bfaf5818c95d5cffafd85207694e7 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 9 Sep 2024 17:53:03 +0200
Subject: [PATCH] shutdown: replace unbounded fsync() with bounded sync_with_progress()
Let's put a time-out on this syncing.
Inspired-by: #34289 #34283
Conflict:1.The file detach-loopback.c/detach-md.c/detach-dm.c all in
umount.c file in systemd 249.
2.context adaption:RET_NERRNO is a new macro for return elegant,no
function difference.And we only need to include shutdown.h once in
umount.c.
Reference:https://github.com/systemd/systemd/pull/34330/commits/b4b66b26620bfaf5818c95d5cffafd85207694e7
---
src/shutdown/umount.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c
index 9325870..0d5bc7e 100644
--- a/src/shutdown/umount.c
+++ b/src/shutdown/umount.c
@@ -38,6 +38,7 @@
#include "signal-util.h"
#include "string-util.h"
#include "strv.h"
+#include "shutdown.h"
#include "umount.h"
#include "util.h"
#include "virt.h"
@@ -411,8 +412,7 @@ static int delete_loopback(const char *device) {
/* Loopback block devices don't sync in-flight blocks when we clear the fd, hence sync explicitly
* first */
- if (fsync(fd) < 0)
- log_debug_errno(errno, "Failed to sync loop block device %s, ignoring: %m", device);
+ (void) sync_with_progress(fd);
if (ioctl(fd, LOOP_CLR_FD, 0) < 0) {
if (errno == ENXIO) /* Nothing bound, didn't do anything */
@@ -474,7 +474,6 @@ static int delete_loopback(const char *device) {
static int delete_dm(MountPoint *m) {
_cleanup_close_ int fd = -1;
- int r;
assert(m);
assert(major(m->devnum) != 0);
@@ -484,9 +483,11 @@ static int delete_dm(MountPoint *m) {
if (fd < 0)
return -errno;
- r = fsync_path_at(AT_FDCWD, m->path);
- if (r < 0)
- log_debug_errno(r, "Failed to sync DM block device %s, ignoring: %m", m->path);
+ _cleanup_close_ int block_fd = open(m->path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open DM block device %s for syncing, ignoring: %m", m->path);
+ else
+ (void) sync_with_progress(block_fd);
if (ioctl(fd, DM_DEV_REMOVE, &(struct dm_ioctl) {
.version = {
@@ -513,8 +514,7 @@ static int delete_md(MountPoint *m) {
if (fd < 0)
return -errno;
- if (fsync(fd) < 0)
- log_debug_errno(errno, "Failed to sync MD block device %s, ignoring: %m", m->path);
+ (void) sync_with_progress(fd);
if (ioctl(fd, STOP_ARRAY, NULL) < 0)
return -errno;
--
2.19.1

View File

@ -0,0 +1,168 @@
From 13b5225d6278af15e84ebd1889f04cfe81b47787 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 9 Sep 2024 17:49:33 +0200
Subject: [PATCH] shutdown: teach sync_with_progress() to optionally sync a specific fd only
This is preparation for reusing the logic for syncing DM and other
devices with a timeout applied.
Conflict:1.delete parameter stdio_fds of safe_fork_full as
asynchronous_fsync don not need the function of that and the input is
NULL as well;
2.don not use the FORK_DETACH flag as the parameter ret_pid is the
address of variable pid in upper-layer call function sync_with_progress
which can't be NULL.As we need to introduce serveral patches if we
introduce flag FORK_DETACH, so just do a adaption here.
3.context adaption.
Reference:https://github.com/systemd/systemd/pull/34330/commits/13b5225d6278af15e84ebd1889f04cfe81b47787
---
src/basic/async.c | 21 +++++++++++++++++++++
src/basic/async.h | 1 +
src/shutdown/shutdown.c | 31 ++++++++++++++++++++++---------
src/shutdown/shutdown.h | 4 ++++
4 files changed, 48 insertions(+), 9 deletions(-)
create mode 100644 src/shutdown/shutdown.h
diff --git a/src/basic/async.c b/src/basic/async.c
index 443cfa9..eb6f0d8 100644
--- a/src/basic/async.c
+++ b/src/basic/async.c
@@ -80,6 +80,27 @@ int asynchronous_sync(pid_t *ret_pid) {
return 0;
}
+int asynchronous_fsync(int fd, pid_t *ret_pid) {
+ int r;
+
+ assert(fd >= 0);
+ /* Same as asynchronous_sync() above, but calls fsync() on a specific fd */
+
+ r = safe_fork_full("(sd-fsync)",
+ /* except_fds= */ &fd,
+ /* n_except_fds= */ 1,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child process */
+ fsync(fd);
+ _exit(EXIT_SUCCESS);
+ }
+
+ return 0;
+}
+
static void *close_thread(void *p) {
(void) pthread_setname_np(pthread_self(), "close");
diff --git a/src/basic/async.h b/src/basic/async.h
index e0bbaa5..24f2629 100644
--- a/src/basic/async.h
+++ b/src/basic/async.h
@@ -8,6 +8,7 @@
int asynchronous_job(void* (*func)(void *p), void *arg);
int asynchronous_sync(pid_t *ret_pid);
+int asynchronous_fsync(int fd, pid_t *ret_pid);
int asynchronous_close(int fd);
DEFINE_TRIVIAL_CLEANUP_FUNC(int, asynchronous_close);
diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c
index 96f0dbd..c03ab43 100644
--- a/src/shutdown/shutdown.c
+++ b/src/shutdown/shutdown.c
@@ -29,6 +29,7 @@
#include "process-util.h"
#include "reboot-util.h"
#include "rlimit-util.h"
+#include "shutdown.h"
#include "signal-util.h"
#include "string-util.h"
#include "switch-root.h"
@@ -221,8 +222,10 @@ static int sync_making_progress(unsigned long long *prev_dirty) {
return r;
}
-static int sync_with_progress(void) {
+int sync_with_progress(int fd) {
unsigned long long dirty = ULLONG_MAX;
+ _cleanup_free_ char *path = NULL;
+ const char *what;
pid_t pid;
int r;
@@ -231,11 +234,20 @@ static int sync_with_progress(void) {
/* Due to the possibility of the sync operation hanging, we fork a child process and monitor
* the progress. If the timeout lapses, the assumption is that the particular sync stalled. */
- r = asynchronous_sync(&pid);
- if (r < 0)
- return log_error_errno(r, "Failed to fork sync(): %m");
+ if (fd >= 0) {
+ r = asynchronous_fsync(fd, &pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork fsync(): %m");
+
+ (void) fd_get_path(fd, &path);
+ } else {
+ r = asynchronous_sync(&pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork sync(): %m");
+ }
- log_info("Syncing filesystems and block devices.");
+ what = path ?: "filesystems and block devices";
+ log_info("Syncing %s.", what);
/* Start monitoring the sync operation. If more than
* SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
@@ -246,7 +258,7 @@ static int sync_with_progress(void) {
/* Sync finished without error (sync() call itself does not return an error code) */
return 0;
if (r != -ETIMEDOUT)
- return log_error_errno(r, "Failed to sync filesystems and block devices: %m");
+ return log_error_errno(r, "Failed to sync %s: %m", what);
/* Reset the check counter if we made some progress */
if (sync_making_progress(&dirty) > 0)
@@ -256,7 +268,8 @@ static int sync_with_progress(void) {
/* Only reached in the event of a timeout. We should issue a kill to the stray process. */
(void) kill(pid, SIGKILL);
return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT),
- "Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".",
+ "Syncing %s - timed out, issuing SIGKILL to PID "PID_FMT".",
+ what,
pid);
}
@@ -378,7 +391,7 @@ int main(int argc, char *argv[]) {
* desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
* result. */
if (!in_container)
- (void) sync_with_progress();
+ (void) sync_with_progress(-EBADF);
disable_coredumps();
disable_binfmt();
@@ -547,7 +560,7 @@ int main(int argc, char *argv[]) {
* sync'ed things already once above, but we did some more work since then which might have caused IO, hence
* let's do it once more. Do not remove this sync, data corruption will result. */
if (!in_container)
- (void) sync_with_progress();
+ (void) sync_with_progress(-EBADF);
if (streq(arg_verb, "exit")) {
if (in_container) {
diff --git a/src/shutdown/shutdown.h b/src/shutdown/shutdown.h
new file mode 100644
index 0000000..99aaec6
--- /dev/null
+++ b/src/shutdown/shutdown.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int sync_with_progress(int fd);
--
2.19.1

View File

@ -28,7 +28,7 @@ index 680de4f..066a03a 100644
+ broadcast_signal(SIGKILL, true, false, arg_timeout);
+
+ if (!in_container)
+ sync_with_progress();
+ (void) sync_with_progress(-EBADF);
+
+ log_info("Rebooting now.");
+ (void) reboot(RB_AUTOBOOT);

View File

@ -25,7 +25,7 @@
Name: systemd
Url: https://systemd.io/
Version: 249
Release: 95
Release: 96
License: MIT and LGPLv2+ and GPLv2+
Summary: System and Service Manager
@ -699,6 +699,9 @@ Patch6646: backport-psi-util-fix-error-handling.patch
Patch6647: backport-pid1-cgroup-show-ignore-EOPNOTSUPP-in-cg_read_pid.patch
Patch6648: backport-cgroup-util-introduce-cg_is_threaded.patch
Patch6649: backport-core-execute-warn-when-threaded-mode-is-detected.patch
Patch6650: backport-shutdown-clean-up-sync_with_progress-a-bit.patch
Patch6651: backport-shutdown-teach-sync_with_progress-to-optionally-sync.patch
Patch6652: backport-shutdown-replace-unbounded-fsync-with-bounded-sync_w.patch
Patch9001: update-rtc-with-system-clock-when-shutdown.patch
Patch9002: udev-add-actions-while-rename-netif-failed.patch
@ -2213,6 +2216,9 @@ grep -q -E '^KEYMAP="?fi-latin[19]"?' /etc/vconsole.conf 2>/dev/null &&
/usr/bin/systemd-cryptenroll
%changelog
* Tue Dec 10 2024 zhangyao <zhangyao108@huawei.com> - 249-96
- backport upstream patch to solve systemd-shutdown hang all the time caused by fsync blocked by absence of DM mapping table
* Mon Dec 09 2024 zhangyao <zhangyao108@huawei.com> - 249-95
- add backport-cgroup-util-introduce-cg_is_threaded.patch
backport-core-execute-warn-when-threaded-mode-is-detected.patch