Compare commits

..

No commits in common. "ec97ff61b2f48c03eef468b8b08738af6f54f55e" and "c0e47ea9c587ee738ec7dd419e2707c19f6525b7" have entirely different histories.

7 changed files with 81 additions and 433 deletions

View File

@ -0,0 +1,67 @@
From: Lu Weitao <luweitaobe@163.com>
Date: Thu, 12 Aug 2021 15:20:40 +0800
Subject: [PATCH 5/5] fix-multi-define
[Why]
met multiple definition as bellow:
[ 117s] /usr/bin/ld: test/alloc_benchmark_tbb-load_tbbmalloc_symbols.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:28: multiple definition of `scalable_mallo
c'; test/alloc_benchmark_tbb-alloc_benchmark.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:28: first defined here
[ 117s] /usr/bin/ld: test/alloc_benchmark_tbb-load_tbbmalloc_symbols.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:29: multiple definition of `scalable_reall
oc'; test/alloc_benchmark_tbb-alloc_benchmark.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:29: first defined here
[ 117s] /usr/bin/ld: test/alloc_benchmark_tbb-load_tbbmalloc_symbols.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:30: multiple definition of `scalable_callo
c'; test/alloc_benchmark_tbb-alloc_benchmark.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:30: first defined here
[ 117s] /usr/bin/ld: test/alloc_benchmark_tbb-load_tbbmalloc_symbols.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:31: multiple definition of `scalable_free'
; test/alloc_benchmark_tbb-alloc_benchmark.o:/home/abuild/rpmbuild/BUILD/memkind-1.7.0/test/tbbmalloc.h:31: first defined here
Signed-off-by: Lu Weitao <luweitaobe@163.com>
---
memkind-1.7.0/test/alloc_benchmark.c | 2 ++
memkind-1.7.0/test/load_tbbmalloc_symbols.c | 3 +++
memkind-1.7.0/test/tbbmalloc.h | 8 ++++----
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/test/alloc_benchmark.c b/test/alloc_benchmark.c
--- a/test/alloc_benchmark.c
+++ b/test/alloc_benchmark.c
@@ -47,6 +47,8 @@
double ctimer(void);
void usage(char * name);
+void* (*scalable_malloc)(size_t);
+void (*scalable_free)(void*);
int main(int argc, char * argv[])
{
diff --git a/test/load_tbbmalloc_symbols.c b/test/load_tbbmalloc_symbols.c
--- a/test/load_tbbmalloc_symbols.c
+++ b/test/load_tbbmalloc_symbols.c
@@ -24,6 +24,9 @@
#include "tbbmalloc.h"
+void* (*scalable_realloc)(void*, size_t);
+void* (*scalable_calloc)(size_t, size_t);
+
int load_tbbmalloc_symbols()
{
const char so_name[]="libtbbmalloc.so.2";
diff --git a/test/tbbmalloc.h b/test/tbbmalloc.h
--- a/test/tbbmalloc.h
+++ b/test/tbbmalloc.h
@@ -25,9 +25,9 @@
#include <stdio.h>
#include <dlfcn.h>
-void* (*scalable_malloc)(size_t);
-void* (*scalable_realloc)(void*, size_t);
-void* (*scalable_calloc)(size_t, size_t);
-void (*scalable_free)(void*);
+extern void* (*scalable_malloc)(size_t);
+extern void* (*scalable_realloc)(void*, size_t);
+extern void* (*scalable_calloc)(size_t, size_t);
+extern void (*scalable_free)(void*);
int load_tbbmalloc_symbols();
--
2.23.0

View File

@ -1,27 +0,0 @@
From ca912b682c6f20dffabc121974f2451272db5aaa Mon Sep 17 00:00:00 2001
From: yangchenguang <yangchenguang@kylinsec.com.cn>
Date: Tue, 30 May 2023 11:38:58 +0800
Subject: [PATCH] Add loongarch64 and sw_64 support
Signed-off-by: yangchenguang <yangchenguang@kylinsec.com.cn>
---
jemalloc/include/jemalloc/internal/quantum.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/jemalloc/include/jemalloc/internal/quantum.h b/jemalloc/include/jemalloc/internal/quantum.h
index 821086e..d6600af 100644
--- a/jemalloc/include/jemalloc/internal/quantum.h
+++ b/jemalloc/include/jemalloc/internal/quantum.h
@@ -30,6 +30,9 @@
# ifdef __hppa__
# define LG_QUANTUM 4
# endif
+# ifdef __loongarch__
+# define LG_QUANTUM 4
+# endif
# ifdef __m68k__
# define LG_QUANTUM 3
# endif
--
2.41.0

View File

@ -1,27 +0,0 @@
From 8a6a84e15f9802b9b16e8cf2a77476e5f5cb7f91 Mon Sep 17 00:00:00 2001
From: herengui <herengui@kylinsec.com.cn>
Date: Tue, 29 Aug 2023 10:17:26 +0800
Subject: [PATCH] add sw_64 support not upstream
Signed-off-by: herengui <herengui@kylinsec.com.cn>
---
jemalloc/include/jemalloc/internal/quantum.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/jemalloc/include/jemalloc/internal/quantum.h b/jemalloc/include/jemalloc/internal/quantum.h
index d6600af..b2e9633 100644
--- a/jemalloc/include/jemalloc/internal/quantum.h
+++ b/jemalloc/include/jemalloc/internal/quantum.h
@@ -58,6 +58,9 @@
defined(__SH4_SINGLE_ONLY__))
# define LG_QUANTUM 4
# endif
+# ifdef __sw_64__
+# define LG_QUANTUM 4
+# endif
# ifdef __tile__
# define LG_QUANTUM 4
# endif
--
2.41.0

View File

@ -1,340 +0,0 @@
From 448eb95b45b0cf6ecc7cf1a3e24056a2fdae85bd Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Fri, 13 Oct 2023 15:21:11 +0800
Subject: [PATCH] Support initializing HBW nodes from memory_locality
In current implementation we mainly infer the HBW nodes from the
HMAT/SLIT, which may not describe all the cases. For example
the HMAT/SLIT cannot describe the topology below:
[ Node 0 ]
[ CPU 0-3 ][ CPU 4-7 ]
| |
[ HBM 0 ][ HBM 1 ]
[ Node 1 ][ Node 2 ]
CPU 0-7 are in one NUMA node, but CPU 0-3 is closest to HBM 0 while
CPU 4-7 is closest to HBM 1. Current HMAT/SLIT cannot support this
case.
In order to support this, openeuler has merged a HBM device driver
to export the topology by sysfs[1]. The description of above topology
will be like:
$ cat /sys/kernel/hbm_memory/memory_topo/memory_locality
1 0-3
1 4-7
This patch cooperate with the HBM device driver to support initializing
the HBW nodes from memory_locality for memkind. Will try to obtains
the HBW nodes by parsing the memory_locality first, on failure or there
is no memory_locality on the system will fallback to HMAT/SLIT. User
can disable this function by MEMKIND_DISABLE_MEMORY_LOCALITY=1 as well.
[1] https://gitee.com/openeuler/kernel/pulls/451
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
include/memkind/internal/memkind_bitmask.h | 2 +
src/memkind_bitmask.c | 185 +++++++++++++++++++++
src/memkind_hbw.c | 42 +++++
3 files changed, 229 insertions(+)
diff --git a/include/memkind/internal/memkind_bitmask.h b/include/memkind/internal/memkind_bitmask.h
index 5c5b8434..6b0c3f64 100644
--- a/include/memkind/internal/memkind_bitmask.h
+++ b/include/memkind/internal/memkind_bitmask.h
@@ -12,6 +12,8 @@ extern "C" {
typedef int (*get_node_bitmask)(struct bitmask **);
+int set_numanode_from_memory_locality(void **numanode,
+ memkind_node_variant_t node_variant);
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
memkind_node_variant_t node_variant);
int set_bitmask_for_current_numanode(unsigned long *nodemask,
diff --git a/src/memkind_bitmask.c b/src/memkind_bitmask.c
index 4f6d9f00..84300395 100644
--- a/src/memkind_bitmask.c
+++ b/src/memkind_bitmask.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: BSD-2-Clause
/* Copyright (C) 2019 - 2021 Intel Corporation. */
+#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
+#include <stdio.h>
#include <memkind/internal/memkind_bitmask.h>
#include <memkind/internal/memkind_log.h>
@@ -12,6 +14,89 @@
// Vector of CPUs with memory NUMA Node id(s)
VEC(vec_cpu_node, int);
+void init_node_closet_cpu(cpu_set_t **cpunode_mask, int num_cpu, int num_nodes)
+{
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t n;
+ FILE *f;
+
+ /*
+ * The content of /sys/kernel/hbm_memory/memory_topo/memory_locality should
+ * be like:
+ * 2 0-3
+ * 3 4-7
+ * 4 8-11
+ * 5 12-15
+ * 6 16-19
+ * 7 20-23
+ * 8 24-27
+ * 9 28-31
+ *
+ * The 1st column is the HBW node number and the 2nd column is the CPU list
+ * which is closet to the HBW node.
+ */
+ f = fopen("/sys/kernel/hbm_memory/memory_topo/memory_locality", "r");
+ if (!f)
+ return;
+
+ while ((n = getline(&line, &len, f)) != -1) {
+ long int node, begin_cpu, end_cpu;
+ char *begin, *end;
+
+ /* Get the node number first */
+ node = strtol(line, &end, 0);
+
+ /* Either the node number is invalid or the whole line is invalid */
+ if (line == end || node == LONG_MAX || node == LONG_MIN)
+ break;
+
+ if (node >= num_nodes) {
+ log_err("Invalid node number provided by memory_locality.");
+ break;
+ }
+
+ /* Try to find the beginning of the CPU list string */
+ while (*end == ' ' && end != line + len)
+ end++;
+
+ if (end == line + len || !isdigit(*end))
+ break;
+
+ begin = end;
+ do {
+ begin_cpu = strtol(begin, &end, 0);
+ if (begin == end || begin_cpu == LONG_MAX || begin_cpu == LONG_MIN)
+ break;
+
+ /* End of the line */
+ if (*end == '\0' || *end == '\n') {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ break;
+ } else if (*end == ',') {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ } else if (*end == '-' && isdigit(*(++end))) {
+ begin = end;
+ end_cpu = strtol(begin, &end, 0);
+ if (begin == end || end_cpu == LONG_MAX || end_cpu == LONG_MIN)
+ break;
+
+ while (begin_cpu <= end_cpu) {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ ++begin_cpu;
+ }
+ } else {
+ break;
+ }
+
+ begin = end + 1;
+ } while (begin < line + len);
+ }
+
+ free(line);
+ fclose(f);
+}
+
int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
{
*bm = numa_parse_nodestring(nodes_env);
@@ -22,6 +107,106 @@ int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
return MEMKIND_SUCCESS;
}
+int set_numanode_from_memory_locality(void **numanode,
+ memkind_node_variant_t node_variant)
+{
+ int num_cpu = numa_num_configured_cpus();
+ int cpuset_size = CPU_ALLOC_SIZE(num_cpu);
+ int max_node_id = numa_max_node();
+ cpu_set_t **cpunode_mask;
+ int init_node, cpu_id;
+ int status;
+
+ cpunode_mask = calloc(max_node_id + 1, sizeof(*cpunode_mask));
+ if (!cpunode_mask) {
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("calloc() failed.");
+ goto out;
+ }
+
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
+ cpunode_mask[init_node] = CPU_ALLOC(num_cpu);
+ if (!cpunode_mask[init_node]) {
+ while (init_node >= 0) {
+ CPU_FREE(cpunode_mask[init_node]);
+ init_node--;
+ }
+
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("CPU_ALLOC_SIZE() failed.");
+ goto free_cpunode_mask;
+ }
+
+ CPU_ZERO_S(cpuset_size, cpunode_mask[init_node]);
+ }
+
+ init_node_closet_cpu(cpunode_mask, num_cpu, max_node_id + 1);
+
+ struct vec_cpu_node *node_arr =
+ (struct vec_cpu_node *)calloc(num_cpu, sizeof(struct vec_cpu_node));
+ if (!node_arr) {
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("calloc() failed.");
+ goto free_cpunode_mask_array;
+ }
+
+ /* Scan CPUs once. Assuming the CPU number are much more bigger than NUMA Nodes */
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
+ if (CPU_ISSET_S(cpu_id, cpuset_size, cpunode_mask[init_node])) {
+ VEC_PUSH_BACK(&node_arr[cpu_id], init_node);
+
+ /*
+ * A cpu should always have one closet node, log error if
+ * violate this.
+ */
+ if (node_variant == NODE_VARIANT_SINGLE &&
+ VEC_SIZE(&node_arr[cpu_id]) > 1) {
+ log_err("CPU%d has more than one closet node.", cpu_id);
+ status = MEMKIND_ERROR_RUNTIME;
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
+ VEC_DELETE(&node_arr[cpu_id]);
+ }
+
+ goto free_node_arr;
+ }
+ }
+ }
+ }
+
+ /* Sanity Check each node_arr */
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_SIZE(&node_arr[cpu_id]) == 0) {
+ log_err("CPU%d's nodemask is not initialized.", cpu_id);
+ status = MEMKIND_ERROR_RUNTIME;
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
+ VEC_DELETE(&node_arr[cpu_id]);
+ }
+
+ goto free_node_arr;
+ }
+ }
+
+ *numanode = node_arr;
+ status = MEMKIND_SUCCESS;
+ goto free_cpunode_mask_array;
+
+free_node_arr:
+ free(node_arr);
+
+free_cpunode_mask_array:
+ for (init_node = 0; init_node <= max_node_id; init_node++)
+ CPU_FREE(cpunode_mask[init_node]);
+
+free_cpunode_mask:
+ free(cpunode_mask);
+
+out:
+ return status;
+}
+
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
memkind_node_variant_t node_variant)
{
diff --git a/src/memkind_hbw.c b/src/memkind_hbw.c
index 077660ab..e9948593 100644
--- a/src/memkind_hbw.c
+++ b/src/memkind_hbw.c
@@ -363,10 +363,36 @@ static bool is_hmat_supported(void)
return true;
}
+/*
+ * OS may provide further information of HBW topology in
+ * /sys/kernel/hbm_memory/memory_topo/memory_locality. Use it unless user
+ * specified HBW nodes or disabled using of memory_locality.
+ */
+static bool use_memory_locality(void)
+{
+ char *memory_locality_disable = memkind_get_env("MEMKIND_DISABLE_MEMORY_LOCALITY");
+
+ if (memory_locality_disable && !strncmp(memory_locality_disable, "1", 1))
+ return false;
+
+ if (memkind_get_env("MEMKIND_HBW_NODES"))
+ return false;
+
+ return true;
+}
+
static void memkind_hbw_closest_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_MULTIPLE];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_MULTIPLE);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_MULTIPLE);
@@ -380,6 +406,14 @@ static void memkind_hbw_closest_preferred_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_SINGLE];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_SINGLE);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_SINGLE);
@@ -393,6 +427,14 @@ static void memkind_hbw_all_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_ALL];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_ALL);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_ALL);
--
2.24.0

Binary file not shown.

BIN
memkind-1.7.0.tar.gz Normal file

Binary file not shown.

View File

@ -1,21 +1,18 @@
Name: memkind
Summary: Extensible Heap Manager for User
Version: 1.13.0
Release: 4
Version: 1.7.0
Release: 5
License: BSD
URL: http://memkind.github.io/memkind
Source0: https://github.com/memkind/memkind/archive/v1.13.0/%{name}-%{version}.tar.gz
Source0: https://github.com/memkind/memkind/archive/v1.7.0/%{name}-%{version}.tar.gz
Patch0001: 0001-fix-multi-define.patch
Patch1000: 1000-add-loongarch-support-upstream.patch
Patch1001: 1001-add-sw_64-support-not-upstream.patch
Patch9000: 9000-Support-initializing-HBW-nodes-from-memory_locality.patch
BuildRequires: automake libtool numactl-devel systemd gcc gcc-c++ hwloc-devel
ExclusiveArch: x86_64 aarch64 loongarch64 sw_64
BuildRequires: automake libtool numactl-devel systemd gcc gcc-c++
ExclusiveArch: x86_64
%description
The kinds of memory are defined by operating system memory policies that have been applied
to virtual address ranges. Memory characteristics supported by memkind without user extension
to virtual address ranges.Memory characteristics supported by memkind without user extension
include control of NUMA and page size features.
%package devel
@ -35,19 +32,15 @@ Help documents for memkind.
%autosetup -a 0 -n %{name}-%{version} -p1
%build
%ifarch loongarch64 sw_64
%_update_config_guess
%_update_config_sub
%endif
pushd %{_builddir}/memkind-1.13.0
echo 1.13.0 > %{_builddir}/memkind-1.13.0/VERSION
pushd %{_builddir}/memkind-1.7.0
echo 1.7.0 > %{_builddir}/memkind-1.7.0/VERSION
./build.sh --prefix=%{_prefix} --includedir=%{_includedir} --libdir=%{_libdir} \
--bindir=%{_bindir} --docdir=%{_docdir}/memkind --mandir=%{_mandir} \
--sbindir=%{_sbindir}
popd
%install
pushd %{_builddir}/memkind-1.13.0
pushd %{_builddir}/memkind-1.7.0
%make_install
popd
@ -59,48 +52,30 @@ popd
%files
%{_libdir}/libmemkind.so.*
%{_libdir}/libmemtier.so.*
%{_libdir}/libautohbw.so.*
%{_bindir}/memkind-hbw-nodes
%{_bindir}/memkind-auto-dax-kmem-nodes
%dir %{_docdir}/memkind
%doc %{_docdir}/memkind/COPYING
%files devel
%{_includedir}/fixed_allocator.h
%dir %{_includedir}/memkind
%dir %{_includedir}/memkind/internal/
%{_includedir}/memkind/internal/*.h
%{_includedir}/memkind*.h
%{_includedir}/hbwmalloc.h
%{_includedir}/hbw_allocator.h
%{_includedir}/pmem_allocator.h
%{_libdir}/libmemkind.so
%{_libdir}/libmemtier.so
%{_libdir}/libautohbw.so
%exclude %{_libdir}/libmemkind.{l,}a
%exclude %{_libdir}/libmemtier.{l,}a
%exclude %{_libdir}/libautohbw.{l,}a
%exclude %{_libdir}/pkgconfig/memkind.pc
%exclude %{_docdir}/memkind/VERSION
%files help
%doc %{_docdir}/memkind/README
%{_mandir}/man1/*
%{_mandir}/man3/*
%{_mandir}/man7/*
%changelog
* Mon Jul 1 2024 Jie Liu <liujie375@h-partners.com> - 1.13.0-4
- Support initializing HBW nodes from HMAT with libaray hwloc-devel
* Wed Oct 18 2023 Yicong Yang <yangyicong@hisilicon.com> - 1.13.0-3
- Support initializing HBW nodes from memory_locality on openEuler
* Tue Aug 29 2023 herengui <herengui@kylinsec.com.cn> - 1.13.0-2
- add support for loongarch64 and sw_64
* Mon Jun 6 2022 Hesham Almatary <hesham.almatary@huawei.com> - 1.13.0-1
- Upgrade to version 1.13.0 and allow building for aarch64
* Mon Aug 2 2021 luweitao <luweitaobe@163.com> - 2.7.1-12
* Wed Aug 2 2021 luweitao <luweitaobe@163.com> - 2.7.1-12
- fix compile failure by GCC-10
* Thu Apr 23 2020 leiju <leiju4@huawei.com> - 1.7.0-4