memory: return NUMA layout of allocated memory during initialization

This will be used by the memory prefaulter to guide the creation of prefault threads, one per NUMA node. A small function to merge layouts from different shards is added in anticipation of its use.
rishvin · Jul 6, 2023 · c2baf51 · c2baf51
1 parent eee093a
commit c2baf51
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 4 deletions.
diff --git a/include/seastar/core/memory.hh b/include/seastar/core/memory.hh
@@ -141,7 +141,24 @@ static constexpr size_t huge_page_size =
 #error "Huge page size is not defined for this architecture"
 #endif
 
-void configure(std::vector<resource::memory> m, bool mbind,
+
+namespace internal {
+
+struct memory_range {
+    char* start;
+    char* end;
+    unsigned numa_node_id;
+};
+
+struct numa_layout {
+    std::vector<memory_range> ranges;
+};
+
+numa_layout merge(numa_layout one, numa_layout two);
+
+}
+
+internal::numa_layout configure(std::vector<resource::memory> m, bool mbind,
         std::optional<std::string> hugetlbfs_path = {});
 
 // A deprecated alias for set_abort_on_allocation_failure(true).

diff --git a/src/core/memory.cc b/src/core/memory.cc
@@ -167,6 +167,13 @@ __thread volatile int critical_alloc_section = 0;
 
 #endif  // SEASTAR_ENABLE_ALLOC_FAILURE_INJECTION
 
+numa_layout
+merge(numa_layout one, numa_layout two) {
+    // There's no chance to merge, so just concatenate
+    one.ranges.insert(one.ranges.end(), two.ranges.begin(), two.ranges.end());
+    return one;
+}
+
 } // namespace internal
 
 }
@@ -1535,7 +1542,8 @@ void disable_large_allocation_warning() {
     get_cpu_mem().large_allocation_warning_threshold = std::numeric_limits<size_t>::max();
 }
 
-void configure(std::vector<resource::memory> m, bool mbind,
+internal::numa_layout
+configure(std::vector<resource::memory> m, bool mbind,
         optional<std::string> hugetlbfs_path) {
     // we need to make sure cpu_mem is initialize since configure calls cpu_mem.resize
     // and we might reach configure without ever allocating, hence without ever calling
@@ -1545,6 +1553,7 @@ void configure(std::vector<resource::memory> m, bool mbind,
     // verify that here.
     init_cpu_mem();
     is_reactor_thread = true;
+    internal::numa_layout ret_layout;
     size_t total = 0;
     for (auto&& x : m) {
         total += x.bytes;
@@ -1565,7 +1574,8 @@ void configure(std::vector<resource::memory> m, bool mbind,
 #ifdef SEASTAR_HAVE_NUMA
         unsigned long nodemask = 1UL << x.nodeid;
         if (mbind) {
-            auto r = ::mbind(get_cpu_mem().mem() + pos, x.bytes,
+            auto start = get_cpu_mem().mem() + pos;
+            auto r = ::mbind(start, x.bytes,
                             MPOL_PREFERRED,
                             &nodemask, std::numeric_limits<unsigned long>::digits,
                             MPOL_MF_MOVE);
@@ -1576,10 +1586,12 @@ void configure(std::vector<resource::memory> m, bool mbind,
                 std::cerr << "WARNING: unable to mbind shard memory; performance may suffer: "
                         << msg << std::endl;
             }
+            ret_layout.ranges.push_back({.start = start, .end = start + x.bytes, .numa_node_id = x.nodeid});
         }
 #endif
         pos += x.bytes;
     }
+    return ret_layout;
 }
 
 statistics stats() {
@@ -2339,7 +2351,9 @@ reclaimer::~reclaimer() {
 void set_reclaim_hook(std::function<void (std::function<void ()>)> hook) {
 }
 
-void configure(std::vector<resource::memory> m, bool mbind, std::optional<std::string> hugepages_path) {
+internal::numa_layout
+configure(std::vector<resource::memory> m, bool mbind, std::optional<std::string> hugepages_path) {
+    return {};
 }
 
 statistics stats() {