From 1af5d798a4d10a07e995d420a759f2fe752b583a Mon Sep 17 00:00:00 2001 From: Alex Wied Date: Fri, 15 Jul 2022 00:14:29 -0400 Subject: [PATCH 1/2] libstore/globals.cc: Automatically set cores based on cgroup CPU limit By default, Nix sets the "cores" setting to the number of CPUs which are physically present on the machine. If cgroups are used to limit the CPU and memory consumption of a large Nix build, the OOM killer may be invoked. For example, consider a GitLab CI pipeline which builds a large software package. The GitLab runner spawns a container whose CPU is limited to 4 cores and whose memory is limited to 16 GiB. If the underlying machine has 64 cores, Nix will invoke the build with -j64. In many cases, that level of parallelism will invoke the OOM killer and the build will completely fail. This change sets the default value of "cores" to be ceil(cpu_quota / cpu_period), with a fallback to std::thread::hardware_concurrency() if cgroups v2 is not detected. --- src/libstore/globals.cc | 50 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index 0f2ca4b15ca..48df839fadb 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -11,6 +11,11 @@ #include #include +#if __linux__ +#include +#include +#endif + #include @@ -114,7 +119,50 @@ std::vector getUserConfigFiles() unsigned int Settings::getDefaultCores() { - return std::max(1U, std::thread::hardware_concurrency()); + unsigned int concurrency = std::max(1U, std::thread::hardware_concurrency()); + + #if __linux__ + FILE *fp = fopen("/proc/mounts", "r"); + if (!fp) + return concurrency; + + Strings cgPathParts; + + struct mntent *ent; + while ((ent = getmntent(fp))) { + std::string mountType, mountPath; + + mountType = ent->mnt_type; + mountPath = ent->mnt_dir; + + if (mountType == "cgroup2") { + cgPathParts.push_back(mountPath); + break; + } + } + + fclose(fp); + + if (cgPathParts.size() > 0 && pathExists("/proc/self/cgroup")) { + std::string currentCgroup = readFile("/proc/self/cgroup"); + Strings cgValues = tokenizeString(currentCgroup, ":"); + cgPathParts.push_back(trim(cgValues.back(), "\n")); + cgPathParts.push_back("cpu.max"); + std::string fullCgPath = canonPath(concatStringsSep("/", cgPathParts)); + + if (pathExists(fullCgPath)) { + std::string cpuMax = readFile(fullCgPath); + std::vector cpuMaxParts = tokenizeString>(cpuMax, " "); + std::string quota = cpuMaxParts[0]; + std::string period = trim(cpuMaxParts[1], "\n"); + + if (quota != "max") + concurrency = std::ceil(std::stoi(quota) / std::stof(period)); + } + } + #endif + + return concurrency; } StringSet Settings::getDefaultSystemFeatures() From 722de8ddcc875c7e8e9a228f9d88454bae31fd40 Mon Sep 17 00:00:00 2001 From: Alex Wied Date: Tue, 19 Jul 2022 02:09:46 -0400 Subject: [PATCH 2/2] libstore/globals.cc: Move cgroup detection to libutil --- src/libstore/globals.cc | 54 +++++------------------------------------ src/libutil/util.cc | 51 ++++++++++++++++++++++++++++++++++++++ src/libutil/util.hh | 3 +++ 3 files changed, 60 insertions(+), 48 deletions(-) diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index 48df839fadb..d724897bb1d 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -11,11 +11,6 @@ #include #include -#if __linux__ -#include -#include -#endif - #include @@ -119,50 +114,13 @@ std::vector getUserConfigFiles() unsigned int Settings::getDefaultCores() { - unsigned int concurrency = std::max(1U, std::thread::hardware_concurrency()); - - #if __linux__ - FILE *fp = fopen("/proc/mounts", "r"); - if (!fp) - return concurrency; - - Strings cgPathParts; - - struct mntent *ent; - while ((ent = getmntent(fp))) { - std::string mountType, mountPath; - - mountType = ent->mnt_type; - mountPath = ent->mnt_dir; - - if (mountType == "cgroup2") { - cgPathParts.push_back(mountPath); - break; - } - } - - fclose(fp); - - if (cgPathParts.size() > 0 && pathExists("/proc/self/cgroup")) { - std::string currentCgroup = readFile("/proc/self/cgroup"); - Strings cgValues = tokenizeString(currentCgroup, ":"); - cgPathParts.push_back(trim(cgValues.back(), "\n")); - cgPathParts.push_back("cpu.max"); - std::string fullCgPath = canonPath(concatStringsSep("/", cgPathParts)); - - if (pathExists(fullCgPath)) { - std::string cpuMax = readFile(fullCgPath); - std::vector cpuMaxParts = tokenizeString>(cpuMax, " "); - std::string quota = cpuMaxParts[0]; - std::string period = trim(cpuMaxParts[1], "\n"); - - if (quota != "max") - concurrency = std::ceil(std::stoi(quota) / std::stof(period)); - } - } - #endif + const unsigned int concurrency = std::max(1U, std::thread::hardware_concurrency()); + const unsigned int maxCPU = getMaxCPU(); - return concurrency; + if (maxCPU > 0) + return maxCPU; + else + return concurrency; } StringSet Settings::getDefaultSystemFeatures() diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 28df30fefbb..be6fe091f4c 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -35,6 +35,9 @@ #ifdef __linux__ #include #include + +#include +#include #endif @@ -788,7 +791,55 @@ void drainFD(int fd, Sink & sink, bool block) } } +////////////////////////////////////////////////////////////////////// + +unsigned int getMaxCPU() +{ + #if __linux__ + try { + FILE *fp = fopen("/proc/mounts", "r"); + if (!fp) + return 0; + Strings cgPathParts; + + struct mntent *ent; + while ((ent = getmntent(fp))) { + std::string mountType, mountPath; + + mountType = ent->mnt_type; + mountPath = ent->mnt_dir; + + if (mountType == "cgroup2") { + cgPathParts.push_back(mountPath); + break; + } + } + + fclose(fp); + + if (cgPathParts.size() > 0 && pathExists("/proc/self/cgroup")) { + std::string currentCgroup = readFile("/proc/self/cgroup"); + Strings cgValues = tokenizeString(currentCgroup, ":"); + cgPathParts.push_back(trim(cgValues.back(), "\n")); + cgPathParts.push_back("cpu.max"); + std::string fullCgPath = canonPath(concatStringsSep("/", cgPathParts)); + + if (pathExists(fullCgPath)) { + std::string cpuMax = readFile(fullCgPath); + std::vector cpuMaxParts = tokenizeString>(cpuMax, " "); + std::string quota = cpuMaxParts[0]; + std::string period = trim(cpuMaxParts[1], "\n"); + + if (quota != "max") + return std::ceil(std::stoi(quota) / std::stof(period)); + } + } + } catch (Error &) { ignoreException(); } + #endif + + return 0; +} ////////////////////////////////////////////////////////////////////// diff --git a/src/libutil/util.hh b/src/libutil/util.hh index d3ed15b0b07..29227ecc6e4 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -182,6 +182,9 @@ std::string drainFD(int fd, bool block = true, const size_t reserveSize=0); void drainFD(int fd, Sink & sink, bool block = true); +/* If cgroups are active, attempt to calculate the number of CPUs available. + If cgroups are unavailable or if cpu.max is set to "max", return 0. */ +unsigned int getMaxCPU(); /* Automatic cleanup of resources. */