rasdaemon: Add some fix patches for rasdaemon
Patch#1: 0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch Fix "nimbus_id=0" field is not displayed when query RAS error information. Patch#2: 0001-Check-CPUs-online-not-configured.patch Check CPUs online, not configured. Signed-off-by: caijian <caijian11@h-partners.com>
This commit is contained in:
parent
dc7d3373de
commit
0a28133cc4
38
0001-Check-CPUs-online-not-configured.patch
Normal file
38
0001-Check-CPUs-online-not-configured.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001
|
||||
From: Zeph / Liz Loss-Cutler-Hull <warp-spam_git@aehallh.com>
|
||||
Date: Sun, 9 Jul 2023 04:57:19 -0700
|
||||
Subject: [PATCH] Check CPUs online, not configured.
|
||||
|
||||
When the number of CPUs detected is greater than the number of CPUs in
|
||||
the system, rasdaemon will crash when it receives some events.
|
||||
|
||||
Looking deeper, we also fail to use the poll method for similar reasons
|
||||
in this case.
|
||||
|
||||
All of this can be prevented by checking to see how many CPUs are
|
||||
currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many
|
||||
CPUs the current kernel was configured to support
|
||||
(sysconf(_SC_NPROCESSORS_CONF)).
|
||||
|
||||
For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||
---
|
||||
ras-events.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ras-events.c b/ras-events.c
|
||||
index a82dab2..5935163 100644
|
||||
--- a/ras-events.c
|
||||
+++ b/ras-events.c
|
||||
@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf,
|
||||
|
||||
static int get_num_cpus(struct ras_events *ras)
|
||||
{
|
||||
- return sysconf(_SC_NPROCESSORS_CONF);
|
||||
+ return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#if 0
|
||||
char fname[MAX_PATH + 1];
|
||||
int num_cpus = 0;
|
||||
--
|
||||
2.25.1
|
||||
|
||||
122
0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch
Normal file
122
0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch
Normal file
@ -0,0 +1,122 @@
|
||||
From 3576ebb2e0e7badb475807058776de748bbc8c43 Mon Sep 17 00:00:00 2001
|
||||
From: Shiju Jose <shiju.jose@huawei.com>
|
||||
Date: Thu, 24 Aug 2023 13:07:17 +0100
|
||||
Subject: [PATCH] rasdaemon: ras-mc-ctl: Modify check for HiSilicon KunPeng9xx
|
||||
error fields
|
||||
|
||||
Modify check for valid HiSilicon KunPeng9xx error fields.
|
||||
Fixes an error data is not printed when it's value is 0.
|
||||
|
||||
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||
---
|
||||
util/ras-mc-ctl.in | 72 +++++++++++++++++++++++-----------------------
|
||||
1 file changed, 36 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
|
||||
index 4178dcf..07e6fca 100755
|
||||
--- a/util/ras-mc-ctl.in
|
||||
+++ b/util/ras-mc-ctl.in
|
||||
@@ -1672,13 +1672,13 @@ sub vendor_errors
|
||||
if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
||||
$out .= "$id. $timestamp Error Info: ";
|
||||
$out .= "version=$version, ";
|
||||
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
||||
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
||||
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
||||
- $out .= "module_id=$module_id, " if ($module_id);
|
||||
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
||||
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
||||
- $out .= "Error Registers: $regs " if ($regs);
|
||||
+ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id);
|
||||
+ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id);
|
||||
+ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id);
|
||||
+ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id);
|
||||
+ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id);
|
||||
+ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity);
|
||||
+ $out .= "Error Registers: $regs " if (defined $regs && length $regs);
|
||||
$out .= "\n\n";
|
||||
$found_module = 1;
|
||||
}
|
||||
@@ -1697,13 +1697,13 @@ sub vendor_errors
|
||||
if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
||||
$out .= "$id. $timestamp Error Info: ";
|
||||
$out .= "version=$version, ";
|
||||
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
||||
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
||||
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
||||
- $out .= "module_id=$module_id, " if ($module_id);
|
||||
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
||||
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
||||
- $out .= "Error Registers: $regs " if ($regs);
|
||||
+ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id);
|
||||
+ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id);
|
||||
+ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id);
|
||||
+ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id);
|
||||
+ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id);
|
||||
+ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity);
|
||||
+ $out .= "Error Registers: $regs " if (defined $regs && length $regs);
|
||||
$out .= "\n\n";
|
||||
$found_module = 1;
|
||||
}
|
||||
@@ -1722,15 +1722,15 @@ sub vendor_errors
|
||||
if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) {
|
||||
$out .= "$id. $timestamp Error Info: ";
|
||||
$out .= "version=$version, ";
|
||||
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
||||
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
||||
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
||||
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
||||
- $out .= "core_id=$core_id, " if ($core_id);
|
||||
- $out .= "port_id=$port_id, " if ($port_id);
|
||||
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
||||
- $out .= "err_type=$err_type, " if ($err_type);
|
||||
- $out .= "Error Registers: $regs " if ($regs);
|
||||
+ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id);
|
||||
+ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id);
|
||||
+ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id);
|
||||
+ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id);
|
||||
+ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id);
|
||||
+ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id);
|
||||
+ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity);
|
||||
+ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type);
|
||||
+ $out .= "Error Registers: $regs " if (defined $regs && length $regs);
|
||||
$out .= "\n\n";
|
||||
$found_module = 1;
|
||||
}
|
||||
@@ -1749,19 +1749,19 @@ sub vendor_errors
|
||||
if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
||||
$out .= "$id. $timestamp Error Info: ";
|
||||
$out .= "version=$version, ";
|
||||
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
||||
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
||||
- $out .= "totem_id=$totem_id, " if ($totem_id);
|
||||
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
||||
- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
|
||||
- $out .= "module_id=$module_id, " if ($module_id);
|
||||
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
||||
- $out .= "core_id=$core_id, " if ($core_id);
|
||||
- $out .= "port_id=$port_id, " if ($port_id);
|
||||
- $out .= "err_type=$err_type, " if ($err_type);
|
||||
- $out .= "pcie_info=$pcie_info, " if ($pcie_info);
|
||||
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
||||
- $out .= "Error Registers: $regs" if ($regs);
|
||||
+ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id);
|
||||
+ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id);
|
||||
+ $out .= "totem_id=$totem_id, " if (defined $totem_id && length $totem_id);
|
||||
+ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id);
|
||||
+ $out .= "sub_system_id=$sub_system_id, " if (defined $sub_system_id && length $sub_system_id);
|
||||
+ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id);
|
||||
+ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id);
|
||||
+ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id );
|
||||
+ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id);
|
||||
+ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type);
|
||||
+ $out .= "pcie_info=$pcie_info, " if (defined $pcie_info && length $pcie_info);
|
||||
+ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity);
|
||||
+ $out .= "Error Registers: $regs" if (defined $regs && length $regs);
|
||||
$out .= "\n\n";
|
||||
$found_module = 1;
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: rasdaemon
|
||||
Version: 0.6.7
|
||||
Release: 14
|
||||
Release: 15
|
||||
License: GPLv2
|
||||
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
|
||||
URL: https://github.com/mchehab/rasdaemon.git
|
||||
@ -44,6 +44,8 @@ Patch6000: backport-rasdaemon-ras-mc-ctl-Fix-script-to-parse-dimm-sizes.patch
|
||||
Patch6001: backport-rasdaemon-ras-memory-failure-handler-handle-localtim.patch
|
||||
Patch6002: backport-rasdaemon-ras-report-fix-possible-but-unlikely-file-.patch
|
||||
Patch6003: backport-tools-lib-traceevent-Add-proper-KBUFFER_TYPE_TIME_ST.patch
|
||||
Patch6004: 0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch
|
||||
Patch6005: 0001-Check-CPUs-online-not-configured.patch
|
||||
|
||||
Patch9000: fix-ras-mc-ctl.service-startup-failed-when-selinux-is-no.patch
|
||||
Patch9001: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch
|
||||
@ -104,6 +106,14 @@ fi
|
||||
/usr/bin/systemctl disable rasdaemon.service >/dev/null 2>&1 || :
|
||||
|
||||
%changelog
|
||||
* Wed Dec 6 2023 caijian <caijian11@h-partners.com> - 0.6.7-15
|
||||
- Type:bugfix
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC:
|
||||
1. Fix "nimbus_id=0" field is not displayed when query RAS error information.
|
||||
2. Check CPUs online,not configured.
|
||||
|
||||
* Wed Nov 29 2023 renhongxun <renhongxun@h-partners.com> - 0.6.7-14
|
||||
- Type:bugfix
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user