From 77c2ccb26b5da0c24a82ae956164fe527723dabd Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 2 Sep 2024 10:24:59 +0800 Subject: [PATCH] revert "rasdaemon: Add HBM Memory ACLS support for HiSilicon" The HBM ACLS scheme that rasdaemon relies on has changed. Moreover, the new solution only needs to be uploaded to the openEuler-22.03-LTS-SP4 branch, so this branch cancels support for HiSilicon HBM Memory ACLS. Signed-off-by: Junhao He --- configure.ac | 11 ---- misc/rasdaemon.env | 5 -- non-standard-hisilicon.c | 110 ------------------------------------- ras-events.c | 3 - ras-non-standard-handler.c | 32 ----------- ras-non-standard-handler.h | 8 --- 6 files changed, 169 deletions(-) diff --git a/configure.ac b/configure.ac index 30c90d2..d098fcf 100644 --- a/configure.ac +++ b/configure.ac @@ -171,16 +171,6 @@ AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "x AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes]) AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"]) -AC_ARG_ENABLE([hisi_hbm_memory_acls], - AS_HELP_STRING([--enable-hisi-hbm-memory-acls], [enable HiSilicon HBM Memory ACLS])) - -AS_IF([test "x$enable_hisi_hbm_memory_acls" = "xyes" || test "x$enable_all" == "xyes"], [ - AC_DEFINE(HAVE_HISI_HBM_MEMORY_ACLS,1,"have HiSilicon HBM Memory ACLS") - AC_SUBST([WITH_HISI_HBM_MEMORY_ACLS]) -]) -AM_CONDITIONAL([WITH_HISI_HBM_MEMORY_ACLS], [test x$enable_hisi_hbm_memory_acls = xyes || test x$enable_all == xyes]) -AM_COND_IF([WITH_HISI_HBM_MEMORY_ACLS], [USE_HISI_HBM_MEMORY_ACLS="yes"], [USE_HISI_HBM_MEMORY_ACLS="no"]) - test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" @@ -222,5 +212,4 @@ compile time options summary Memory CE PFA : $USE_MEMORY_CE_PFA AMP RAS errors : $USE_AMP_NS_DECODE CPU fault isolation : $USE_CPU_FAULT_ISOLATION - HISI HBM Memory ACLS: $USE_HISI_HBM_MEMORY_ACLS EOF diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env index d754128..ca12a1a 100644 --- a/misc/rasdaemon.env +++ b/misc/rasdaemon.env @@ -45,10 +45,5 @@ CPU_ISOLATION_CYCLE="24h" # Prevent excessive isolation from causing an avalanche effect CPU_ISOLATION_LIMIT="10" -# Support the HBM Memory ACLS (Adaptive Cache Line Sparing) on HiSilicon platform (yes|no). -HISI_HBM_MEMORY_ACLS="no" -# Specify rasdaemon to isolation the error page which fails to be repaired by HiSilicon HBM ACLS (yes|no). -HISI_HBM_ISOLATION_PAGE="no" - # Disable specified events by config DISABLE="block:block_rq_complete" \ No newline at end of file diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c index 25c4903..7296d28 100644 --- a/non-standard-hisilicon.c +++ b/non-standard-hisilicon.c @@ -19,17 +19,6 @@ #define HISI_BUF_LEN 2048 #define HISI_PCIE_INFO_BUF_LEN 256 -#ifdef HAVE_HISI_HBM_MEMORY_ACLS -# define HISI_TYPE_UINT32_WIDTH 32 -/* Specify the Hisilicon HBMC HBM error type */ -# define HISI_HBM_ERR_TYPE 0 -# define HISI_HBM_ERR_ACLS BIT(0) -# define HISI_HBM_ACLS_ADDL 1 -# define HISI_HBM_ACLS_ADDH 2 -# define HISI_HBM_ACLS_ARRAY_SIZE 12 -# define HISI_SUBMOD_HBMC_HBM 6 -#endif - struct hisi_common_error_section { uint32_t val_bits; uint8_t version; @@ -369,100 +358,6 @@ static int add_hisi_common_table(struct ras_events *ras, return 0; } -#ifdef HAVE_HISI_HBM_MEMORY_ACLS -#include -#include - -static int write_file(const char *name, unsigned long long value) -{ - char fname[MAX_PATH + 1] = "/sys/kernel/"; - FILE *file; - int ret; - - strcat(fname, name); - if (access(fname, W_OK)) { - log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Cannot access '%s': %s\n", - fname, strerror(errno)); - return -errno; - } - - file = fopen(fname, "w"); - if (!file) { - log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Cannot to open '%s': %s\n", - fname, strerror(errno)); - return -errno; - } - - ret = fprintf(file, "0x%llx\n", value); - if (ret < 0) - log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Failed to set %s (0x%llx): %s\n", - fname, value, strerror(errno)); - - fclose(file); - return ret; -} - -static int hisi_hbmc_hbm_acls(const struct hisi_common_error_section *err, - int page_size) -{ - unsigned long long paddr; - unsigned long long pfn; - int ret; - - if (err->reg_array_size < HISI_HBM_ACLS_ARRAY_SIZE) { - log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: No valid address array length (%d)\n", - err->reg_array_size); - return -1; - } - - if (!page_size) - return -1; - - paddr = err->reg_array[HISI_HBM_ACLS_ADDH]; - paddr <<= HISI_TYPE_UINT32_WIDTH; - paddr += err->reg_array[HISI_HBM_ACLS_ADDL]; - pfn = paddr / page_size; - - ret = write_file("hbm_memory/acls/acls_query", paddr); - if (ret < 0) - return ret; - - ret = write_file("debug/hwpoison/corrupt-pfn", pfn); - if (ret < 0) - return ret; - - ret = write_file("hbm_memory/acls/acls_repair", paddr); - if (ret < 0 && ras_ns_hisi_hbm_isolation_page_enabled()) { - log(TERM, LOG_WARNING, "HiSilicon HBM ACLS: Keep the pfn (0x%llx) offline\n", - pfn); - return ret; - } - - ret = write_file("debug/hwpoison/unpoison-pfn", pfn); - if (ret < 0) - return ret; - - return 0; -} - -static void hisi_hbm_acls_handler(const struct hisi_common_error_section *err, - int page_size) -{ - if (strcmp(module_name[err->module_id], "HBMC") || - err->submodule_id != HISI_SUBMOD_HBMC_HBM) - return; - - if (!(err->val_bits & BIT(HISI_COMMON_VALID_REG_ARRAY_SIZE))) - return; - - if (!(err->reg_array[HISI_HBM_ERR_TYPE] & HISI_HBM_ERR_ACLS)) - return; - - if (hisi_hbmc_hbm_acls(err, page_size)) - log(TERM, LOG_WARNING, "Failed to handler HiSilicon HBM ACLS\n"); -} -#endif - static int decode_hisi_common_section(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, struct trace_seq *s, @@ -498,11 +393,6 @@ static int decode_hisi_common_section(struct ras_events *ras, step_vendor_data_tab(ev_decoder, "hisi_common_section_tab"); } -#ifdef HAVE_HISI_HBM_MEMORY_ACLS - if (ras_ns_hisi_hbm_acls_enabled()) - hisi_hbm_acls_handler(err, ras->page_size); -#endif - return 0; } diff --git a/ras-events.c b/ras-events.c index d2a7a4e..ed2198b 100644 --- a/ras-events.c +++ b/ras-events.c @@ -951,9 +951,6 @@ int handle_ras_events(int record_events) #endif #ifdef HAVE_NON_STANDARD -#ifdef HAVE_HISI_HBM_MEMORY_ACLS - ras_ns_hisi_hbm_param_init(); -#endif if (is_disabled_event("ras", "non_standard_event")) { log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", "ras", "non_standard_event"); diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c index 3ed0900..20d514b 100644 --- a/ras-non-standard-handler.c +++ b/ras-non-standard-handler.c @@ -24,38 +24,6 @@ static struct ras_ns_ev_decoder *ras_ns_ev_dec_list; -#ifdef HAVE_HISI_HBM_MEMORY_ACLS -static bool ras_ns_hisi_hbm_acls; -static bool ras_ns_hisi_hbm_isolation_page; - -void ras_ns_hisi_hbm_param_init(void) -{ - char *env; - - env = getenv("HISI_HBM_MEMORY_ACLS"); - if (env && strcasecmp(env, "yes") == 0) { - log(TERM, LOG_INFO, "HiSilicon HBM Memory ACLS is enabled\n"); - ras_ns_hisi_hbm_acls = true; - } - - env = getenv("HISI_HBM_ISOLATION_PAGE"); - if (env && strcasecmp(env, "yes") == 0) { - log(TERM, LOG_INFO, "HiSilicon HBM ACLS page isolation is enabled\n"); - ras_ns_hisi_hbm_isolation_page = true; - } -} - -bool ras_ns_hisi_hbm_acls_enabled(void) -{ - return ras_ns_hisi_hbm_acls; -} - -bool ras_ns_hisi_hbm_isolation_page_enabled(void) -{ - return ras_ns_hisi_hbm_isolation_page; -} -#endif - void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) { trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]); } diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h index 1c2a6e7..341206a 100644 --- a/ras-non-standard-handler.h +++ b/ras-non-standard-handler.h @@ -46,12 +46,4 @@ void ras_ns_finalize_vendor_tables(void); static inline int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) { return 0; }; #endif -#ifdef HAVE_HISI_HBM_MEMORY_ACLS -#include - -void ras_ns_hisi_hbm_param_init(void); -bool ras_ns_hisi_hbm_acls_enabled(void); -bool ras_ns_hisi_hbm_isolation_page_enabled(void); -#endif - #endif -- 2.33.0