!161 revert "rasdaemon: Add HBM Memory ACLS support for HiSilicon"
From: @hejunhao3 Reviewed-by: @hunan4222, @dillon_chen Signed-off-by: @dillon_chen
This commit is contained in:
commit
df360dca76
274
0001-revert-rasdaemon-Add-HBM-Memory-ACLS-support-for-HiS.patch
Normal file
274
0001-revert-rasdaemon-Add-HBM-Memory-ACLS-support-for-HiS.patch
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
From 77c2ccb26b5da0c24a82ae956164fe527723dabd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Junhao He <hejunhao3@huawei.com>
|
||||||
|
Date: Mon, 2 Sep 2024 10:24:59 +0800
|
||||||
|
Subject: [PATCH] revert "rasdaemon: Add HBM Memory ACLS support for HiSilicon"
|
||||||
|
|
||||||
|
The HBM ACLS scheme that rasdaemon relies on has changed. Moreover, the
|
||||||
|
new solution only needs to be uploaded to the openEuler-22.03-LTS-SP4
|
||||||
|
branch, so this branch cancels support for HiSilicon HBM Memory ACLS.
|
||||||
|
|
||||||
|
Signed-off-by: Junhao He <hejunhao3@huawei.com>
|
||||||
|
---
|
||||||
|
configure.ac | 11 ----
|
||||||
|
misc/rasdaemon.env | 5 --
|
||||||
|
non-standard-hisilicon.c | 110 -------------------------------------
|
||||||
|
ras-events.c | 3 -
|
||||||
|
ras-non-standard-handler.c | 32 -----------
|
||||||
|
ras-non-standard-handler.h | 8 ---
|
||||||
|
6 files changed, 169 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index 30c90d2..d098fcf 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -171,16 +171,6 @@ AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "x
|
||||||
|
AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes])
|
||||||
|
AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"])
|
||||||
|
|
||||||
|
-AC_ARG_ENABLE([hisi_hbm_memory_acls],
|
||||||
|
- AS_HELP_STRING([--enable-hisi-hbm-memory-acls], [enable HiSilicon HBM Memory ACLS]))
|
||||||
|
-
|
||||||
|
-AS_IF([test "x$enable_hisi_hbm_memory_acls" = "xyes" || test "x$enable_all" == "xyes"], [
|
||||||
|
- AC_DEFINE(HAVE_HISI_HBM_MEMORY_ACLS,1,"have HiSilicon HBM Memory ACLS")
|
||||||
|
- AC_SUBST([WITH_HISI_HBM_MEMORY_ACLS])
|
||||||
|
-])
|
||||||
|
-AM_CONDITIONAL([WITH_HISI_HBM_MEMORY_ACLS], [test x$enable_hisi_hbm_memory_acls = xyes || test x$enable_all == xyes])
|
||||||
|
-AM_COND_IF([WITH_HISI_HBM_MEMORY_ACLS], [USE_HISI_HBM_MEMORY_ACLS="yes"], [USE_HISI_HBM_MEMORY_ACLS="no"])
|
||||||
|
-
|
||||||
|
test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
|
||||||
|
|
||||||
|
CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
|
||||||
|
@@ -222,5 +212,4 @@ compile time options summary
|
||||||
|
Memory CE PFA : $USE_MEMORY_CE_PFA
|
||||||
|
AMP RAS errors : $USE_AMP_NS_DECODE
|
||||||
|
CPU fault isolation : $USE_CPU_FAULT_ISOLATION
|
||||||
|
- HISI HBM Memory ACLS: $USE_HISI_HBM_MEMORY_ACLS
|
||||||
|
EOF
|
||||||
|
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
|
||||||
|
index d754128..ca12a1a 100644
|
||||||
|
--- a/misc/rasdaemon.env
|
||||||
|
+++ b/misc/rasdaemon.env
|
||||||
|
@@ -45,10 +45,5 @@ CPU_ISOLATION_CYCLE="24h"
|
||||||
|
# Prevent excessive isolation from causing an avalanche effect
|
||||||
|
CPU_ISOLATION_LIMIT="10"
|
||||||
|
|
||||||
|
-# Support the HBM Memory ACLS (Adaptive Cache Line Sparing) on HiSilicon platform (yes|no).
|
||||||
|
-HISI_HBM_MEMORY_ACLS="no"
|
||||||
|
-# Specify rasdaemon to isolation the error page which fails to be repaired by HiSilicon HBM ACLS (yes|no).
|
||||||
|
-HISI_HBM_ISOLATION_PAGE="no"
|
||||||
|
-
|
||||||
|
# Disable specified events by config
|
||||||
|
DISABLE="block:block_rq_complete"
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c
|
||||||
|
index 25c4903..7296d28 100644
|
||||||
|
--- a/non-standard-hisilicon.c
|
||||||
|
+++ b/non-standard-hisilicon.c
|
||||||
|
@@ -19,17 +19,6 @@
|
||||||
|
#define HISI_BUF_LEN 2048
|
||||||
|
#define HISI_PCIE_INFO_BUF_LEN 256
|
||||||
|
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
-# define HISI_TYPE_UINT32_WIDTH 32
|
||||||
|
-/* Specify the Hisilicon HBMC HBM error type */
|
||||||
|
-# define HISI_HBM_ERR_TYPE 0
|
||||||
|
-# define HISI_HBM_ERR_ACLS BIT(0)
|
||||||
|
-# define HISI_HBM_ACLS_ADDL 1
|
||||||
|
-# define HISI_HBM_ACLS_ADDH 2
|
||||||
|
-# define HISI_HBM_ACLS_ARRAY_SIZE 12
|
||||||
|
-# define HISI_SUBMOD_HBMC_HBM 6
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
struct hisi_common_error_section {
|
||||||
|
uint32_t val_bits;
|
||||||
|
uint8_t version;
|
||||||
|
@@ -369,100 +358,6 @@ static int add_hisi_common_table(struct ras_events *ras,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
-#include <errno.h>
|
||||||
|
-#include <unistd.h>
|
||||||
|
-
|
||||||
|
-static int write_file(const char *name, unsigned long long value)
|
||||||
|
-{
|
||||||
|
- char fname[MAX_PATH + 1] = "/sys/kernel/";
|
||||||
|
- FILE *file;
|
||||||
|
- int ret;
|
||||||
|
-
|
||||||
|
- strcat(fname, name);
|
||||||
|
- if (access(fname, W_OK)) {
|
||||||
|
- log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Cannot access '%s': %s\n",
|
||||||
|
- fname, strerror(errno));
|
||||||
|
- return -errno;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- file = fopen(fname, "w");
|
||||||
|
- if (!file) {
|
||||||
|
- log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Cannot to open '%s': %s\n",
|
||||||
|
- fname, strerror(errno));
|
||||||
|
- return -errno;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- ret = fprintf(file, "0x%llx\n", value);
|
||||||
|
- if (ret < 0)
|
||||||
|
- log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Failed to set %s (0x%llx): %s\n",
|
||||||
|
- fname, value, strerror(errno));
|
||||||
|
-
|
||||||
|
- fclose(file);
|
||||||
|
- return ret;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int hisi_hbmc_hbm_acls(const struct hisi_common_error_section *err,
|
||||||
|
- int page_size)
|
||||||
|
-{
|
||||||
|
- unsigned long long paddr;
|
||||||
|
- unsigned long long pfn;
|
||||||
|
- int ret;
|
||||||
|
-
|
||||||
|
- if (err->reg_array_size < HISI_HBM_ACLS_ARRAY_SIZE) {
|
||||||
|
- log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: No valid address array length (%d)\n",
|
||||||
|
- err->reg_array_size);
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (!page_size)
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- paddr = err->reg_array[HISI_HBM_ACLS_ADDH];
|
||||||
|
- paddr <<= HISI_TYPE_UINT32_WIDTH;
|
||||||
|
- paddr += err->reg_array[HISI_HBM_ACLS_ADDL];
|
||||||
|
- pfn = paddr / page_size;
|
||||||
|
-
|
||||||
|
- ret = write_file("hbm_memory/acls/acls_query", paddr);
|
||||||
|
- if (ret < 0)
|
||||||
|
- return ret;
|
||||||
|
-
|
||||||
|
- ret = write_file("debug/hwpoison/corrupt-pfn", pfn);
|
||||||
|
- if (ret < 0)
|
||||||
|
- return ret;
|
||||||
|
-
|
||||||
|
- ret = write_file("hbm_memory/acls/acls_repair", paddr);
|
||||||
|
- if (ret < 0 && ras_ns_hisi_hbm_isolation_page_enabled()) {
|
||||||
|
- log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Keep the pfn (0x%llx) offline\n",
|
||||||
|
- pfn);
|
||||||
|
- return ret;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- ret = write_file("debug/hwpoison/unpoison-pfn", pfn);
|
||||||
|
- if (ret < 0)
|
||||||
|
- return ret;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void hisi_hbm_acls_handler(const struct hisi_common_error_section *err,
|
||||||
|
- int page_size)
|
||||||
|
-{
|
||||||
|
- if (strcmp(module_name[err->module_id], "HBMC") ||
|
||||||
|
- err->submodule_id != HISI_SUBMOD_HBMC_HBM)
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- if (!(err->val_bits & BIT(HISI_COMMON_VALID_REG_ARRAY_SIZE)))
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- if (!(err->reg_array[HISI_HBM_ERR_TYPE] & HISI_HBM_ERR_ACLS))
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- if (hisi_hbmc_hbm_acls(err, page_size))
|
||||||
|
- log(TERM, LOG_WARNING, "Failed to handler HiSilicon HBM ACLS\n");
|
||||||
|
-}
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
static int decode_hisi_common_section(struct ras_events *ras,
|
||||||
|
struct ras_ns_ev_decoder *ev_decoder,
|
||||||
|
struct trace_seq *s,
|
||||||
|
@@ -498,11 +393,6 @@ static int decode_hisi_common_section(struct ras_events *ras,
|
||||||
|
step_vendor_data_tab(ev_decoder, "hisi_common_section_tab");
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
- if (ras_ns_hisi_hbm_acls_enabled())
|
||||||
|
- hisi_hbm_acls_handler(err, ras->page_size);
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/ras-events.c b/ras-events.c
|
||||||
|
index d2a7a4e..ed2198b 100644
|
||||||
|
--- a/ras-events.c
|
||||||
|
+++ b/ras-events.c
|
||||||
|
@@ -951,9 +951,6 @@ int handle_ras_events(int record_events)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_NON_STANDARD
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
- ras_ns_hisi_hbm_param_init();
|
||||||
|
-#endif
|
||||||
|
if (is_disabled_event("ras", "non_standard_event")) {
|
||||||
|
log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
||||||
|
"ras", "non_standard_event");
|
||||||
|
diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c
|
||||||
|
index 3ed0900..20d514b 100644
|
||||||
|
--- a/ras-non-standard-handler.c
|
||||||
|
+++ b/ras-non-standard-handler.c
|
||||||
|
@@ -24,38 +24,6 @@
|
||||||
|
|
||||||
|
static struct ras_ns_ev_decoder *ras_ns_ev_dec_list;
|
||||||
|
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
-static bool ras_ns_hisi_hbm_acls;
|
||||||
|
-static bool ras_ns_hisi_hbm_isolation_page;
|
||||||
|
-
|
||||||
|
-void ras_ns_hisi_hbm_param_init(void)
|
||||||
|
-{
|
||||||
|
- char *env;
|
||||||
|
-
|
||||||
|
- env = getenv("HISI_HBM_MEMORY_ACLS");
|
||||||
|
- if (env && strcasecmp(env, "yes") == 0) {
|
||||||
|
- log(TERM, LOG_INFO, "HiSilicon HBM Memory ACLS is enabled\n");
|
||||||
|
- ras_ns_hisi_hbm_acls = true;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- env = getenv("HISI_HBM_ISOLATION_PAGE");
|
||||||
|
- if (env && strcasecmp(env, "yes") == 0) {
|
||||||
|
- log(TERM, LOG_INFO, "HiSilicon HBM ACLS page isolation is enabled\n");
|
||||||
|
- ras_ns_hisi_hbm_isolation_page = true;
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-bool ras_ns_hisi_hbm_acls_enabled(void)
|
||||||
|
-{
|
||||||
|
- return ras_ns_hisi_hbm_acls;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-bool ras_ns_hisi_hbm_isolation_page_enabled(void)
|
||||||
|
-{
|
||||||
|
- return ras_ns_hisi_hbm_isolation_page;
|
||||||
|
-}
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) {
|
||||||
|
trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]);
|
||||||
|
}
|
||||||
|
diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h
|
||||||
|
index 1c2a6e7..341206a 100644
|
||||||
|
--- a/ras-non-standard-handler.h
|
||||||
|
+++ b/ras-non-standard-handler.h
|
||||||
|
@@ -46,12 +46,4 @@ void ras_ns_finalize_vendor_tables(void);
|
||||||
|
static inline int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) { return 0; };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#ifdef HAVE_HISI_HBM_MEMORY_ACLS
|
||||||
|
-#include <stdbool.h>
|
||||||
|
-
|
||||||
|
-void ras_ns_hisi_hbm_param_init(void);
|
||||||
|
-bool ras_ns_hisi_hbm_acls_enabled(void);
|
||||||
|
-bool ras_ns_hisi_hbm_isolation_page_enabled(void);
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
#endif
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
Name: rasdaemon
|
Name: rasdaemon
|
||||||
Version: 0.6.7
|
Version: 0.6.7
|
||||||
Release: 20
|
Release: 21
|
||||||
License: GPLv2
|
License: GPLv2
|
||||||
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
|
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
|
||||||
URL: https://github.com/mchehab/rasdaemon.git
|
URL: https://github.com/mchehab/rasdaemon.git
|
||||||
@ -60,6 +60,7 @@ Patch9008: 0001-rasdaemon-Add-HBM-Memory-ACLS-support-for-HiSilicon.patch
|
|||||||
Patch9009: add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch
|
Patch9009: add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch
|
||||||
Patch9010: fix-rasdaemon-print-loading-config-logs-multiple-times.patch
|
Patch9010: fix-rasdaemon-print-loading-config-logs-multiple-times.patch
|
||||||
Patch9011: bugfix-fix-cpu-isolate-errors-when-some-cpus-are-.patch
|
Patch9011: bugfix-fix-cpu-isolate-errors-when-some-cpus-are-.patch
|
||||||
|
Patch9012: 0001-revert-rasdaemon-Add-HBM-Memory-ACLS-support-for-HiS.patch
|
||||||
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
@ -79,7 +80,7 @@ autoheader
|
|||||||
libtoolize --automake --copy --debug --force
|
libtoolize --automake --copy --debug --force
|
||||||
automake --add-missing
|
automake --add-missing
|
||||||
%ifarch %{arm} aarch64
|
%ifarch %{arm} aarch64
|
||||||
%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-failure --enable-memory-ce-pfa --enable-cpu-fault-isolation --enable-hisi-hbm-memory-acls
|
%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-failure --enable-memory-ce-pfa --enable-cpu-fault-isolation
|
||||||
%else
|
%else
|
||||||
%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror
|
%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror
|
||||||
%endif
|
%endif
|
||||||
@ -114,6 +115,12 @@ if [ $1 -eq 0 ] ; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Sep 2 2024 Junhao He <hejunhao3@huawei.com> - 0.6.7-21
|
||||||
|
- Type:bugfix
|
||||||
|
- ID:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:Remove the support for HiSilicon HBM Memory ACLS.
|
||||||
|
|
||||||
* Thu Apr 25 2024 yangjunshuo <yangjunshuo@huawei.com> - 0.6.7-20
|
* Thu Apr 25 2024 yangjunshuo <yangjunshuo@huawei.com> - 0.6.7-20
|
||||||
- Type:bugfix
|
- Type:bugfix
|
||||||
- ID:NA
|
- ID:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user