Skip to content

Commit

Permalink
Collect container-level GPU metrics using NVML.
Browse files Browse the repository at this point in the history
 When cAdvisor starts up, it would read the `vendor` files in
`/sys/bus/pci/devices/*` to see if any NVIDIA devices (vendor ID: 0x10de) are
attached to the node. If no NVIDIA devices are found, this code path would
become dormant for the rest of cAdvisor lifetime. If NVIDIA devices are found,
we would start a goroutine that would check for the presence of NVML by trying
to dynamically load it at regular intervals. We need to do this regular
checking instead of doing it just once because it may happen that cAdvisor is
started before the NVIDIA drivers and NVML are installed.  Once the NVML
dynamic loading succeeds, we would use NVML’s query methods to find out how
many devices exist on the node and create a map from their minor numbers to
their handles and cache that map. The goroutine would exit at this point.

If we detected the presence of NVML in the previous step, whenever a new
container is detected by cAdvisor, cAdvisor would read the `devices.list` file
from the container's devices cgroup. The `devices.list` file lists the
major:minor number of all the devices that the container is allowed to access.
If we find any device with major number 195 (which is the major number assigned
to NVIDIA devices), we would cache the list of corresponding minor numbers for
that container.

During every housekeeping operation, in addition to collecting all the existing
metrics, we will use the cached NVIDIA device minor numbers and the map from
minor numbers to device handles to get metrics for GPU devices attached to the
container.
  • Loading branch information
rohitagarwal003 committed Nov 6, 2017
1 parent 318f28b commit 4a35130
Show file tree
Hide file tree
Showing 7 changed files with 491 additions and 2 deletions.
239 changes: 239 additions & 0 deletions accelerators/nvidia.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package accelerators

import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"

info "github.com/google/cadvisor/info/v1"

"github.com/golang/glog"
"github.com/mindprince/gonvml"
)

type NvidiaManager struct {
// true if the NVML library (libnvidia-ml.so.1) was loaded successfully
nvmlInitialized bool

// nvidiaDevices is a map from device minor number to a handle that can be used to get metrics about the device
nvidiaDevices map[int]gonvml.Device
}

var sysFsPCIDevicesPath = "/sys/bus/pci/devices/"

const nvidiaVendorId = "0x10de"

// Setup initializes NVML if nvidia devices are present on the node.
func (nm *NvidiaManager) Setup() {
if !detectDevices(nvidiaVendorId) {
glog.Info("No NVIDIA devices found.")
return
}

go func() {
glog.Info("Starting goroutine to initialize NVML")
nm.initializeNVML()
if nm.nvmlInitialized {
return
}
// TODO: use globalHousekeepingInterval
for range time.Tick(time.Minute) {
nm.initializeNVML()
if nm.nvmlInitialized {
return
}
}
}()
}

// detectDevices returns true if a device with given pci id is present on the node.
func detectDevices(vendorId string) bool {
devices, err := ioutil.ReadDir(sysFsPCIDevicesPath)
if err != nil {
glog.Warningf("error reading %q: %v", sysFsPCIDevicesPath, err)
return false
}

for _, device := range devices {
vendorPath := filepath.Join(sysFsPCIDevicesPath, device.Name(), "vendor")
content, err := ioutil.ReadFile(vendorPath)
if err != nil {
glog.Infof("Error while reading %q: %v", vendorPath, err)
continue
}
if strings.EqualFold(strings.TrimSpace(string(content)), vendorId) {
glog.Infof("Found device with vendorId %q", vendorId)
return true
}
}
return false
}

// initializeNVML initializes the NVML library and sets up the nvmlDevices map.
func (nm *NvidiaManager) initializeNVML() {
if err := gonvml.Initialize(); err != nil {
// This is under a logging level because otherwise we may cause
// log spam if the drivers/nvml is not installed on the system.
glog.V(3).Infof("Could not initialize NVML: %v", err)
return
}
nm.nvmlInitialized = true
numDevices, err := gonvml.DeviceCount()
if err != nil {
glog.Warningf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err)
return
}
glog.Infof("NVML initialized. Number of nvidia devices: %v", numDevices)
nm.nvidiaDevices = make(map[int]gonvml.Device, numDevices)
for i := 0; i < int(numDevices); i++ {
device, err := gonvml.DeviceHandleByIndex(uint(i))
if err != nil {
glog.Warningf("Failed to get nvidia device handle %d: %v", i, err)
continue
}
minorNumber, err := device.MinorNumber()
if err != nil {
glog.Warningf("Failed to get nvidia device minor number: %v", err)
continue
}
nm.nvidiaDevices[int(minorNumber)] = device
}
}

// Destroy shuts down NVML.
func (nm *NvidiaManager) Destroy() {
if nm.nvmlInitialized {
gonvml.Shutdown()
}
}

// GetCollector returns a collector that can fetch nvidia gpu metrics for nvidia devices
// present in the devices.list file in the given devicesCgroupPath.
func (nm *NvidiaManager) GetCollector(devicesCgroupPath string) (AcceleratorCollector, error) {
nc := &NvidiaCollector{}
if !nm.nvmlInitialized || len(nm.nvidiaDevices) == 0 {
return nc, nil
}
nvidiaMinorNumbers, err := parseDevicesCgroup(devicesCgroupPath)
if err != nil {
return nc, err
}
for _, minor := range nvidiaMinorNumbers {
device, ok := nm.nvidiaDevices[minor]
if !ok {
return nc, fmt.Errorf("nvidia device minor number %d not found in cached devices", minor)
}
nc.Devices = append(nc.Devices, device)
}
return nc, nil
}

// parseDevicesCgroup parses the devices cgroup devices.list file for the container
// and returns a list of minor numbers corresponding to NVIDIA GPU devices that the
// container is allowed to access. In cases where the container has access to all
// devices or all NVIDIA devices but the devices are not enumerated separately in
// the devices.list file, we return an empty list.
// This is defined as a variable to help in testing.
var parseDevicesCgroup = func(devicesCgroupPath string) ([]int, error) {
// Always return a non-nil slice
nvidiaMinorNumbers := []int{}

devicesList := filepath.Join(devicesCgroupPath, "devices.list")
f, err := os.Open(devicesList)
if err != nil {
return nvidiaMinorNumbers, fmt.Errorf("error while opening devices cgroup file %q: %v", devicesList, err)
}
defer f.Close()

s := bufio.NewScanner(f)

// See https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt for the file format
for s.Scan() {
text := s.Text()

fields := strings.Fields(text)
if len(fields) != 3 {
return nvidiaMinorNumbers, fmt.Errorf("invalid devices cgroup entry %q: must contain three whitespace-separated fields", text)
}

// Split the second field to find out major:minor numbers
majorMinor := strings.Split(fields[1], ":")
if len(majorMinor) != 2 {
return nvidiaMinorNumbers, fmt.Errorf("invalid devices cgroup entry %q: second field should have one colon", text)
}

// NVIDIA graphics devices are character devices with major number 195.
// https://github.com/torvalds/linux/blob/v4.13/Documentation/admin-guide/devices.txt#L2583
if fields[0] == "c" && majorMinor[0] == "195" {
minorNumber, err := strconv.Atoi(majorMinor[1])
if err != nil {
return nvidiaMinorNumbers, fmt.Errorf("invalid devices cgroup entry %q: minor number is not integer", text)
}
// We don't want devices like nvidiactl (195:255) and nvidia-modeset (195:254)
if minorNumber < 128 {
nvidiaMinorNumbers = append(nvidiaMinorNumbers, minorNumber)
}
// We are ignoring the "195:*" case
// where the container has access to all NVIDIA devices on the machine.
}
// We are ignoring the "*:*" case
// where the container has access to all devices on the machine.
}
return nvidiaMinorNumbers, nil
}

type NvidiaCollector struct {
// Exposed for testing
Devices []gonvml.Device
}

// UpdateStats updates the stats for NVIDIA GPUs (if any) attached to the container.
func (nc *NvidiaCollector) UpdateStats(stats *info.ContainerStats) error {
for _, device := range nc.Devices {
model, err := device.Name()
if err != nil {
return fmt.Errorf("error while getting gpu name: %v", err)
}
uuid, err := device.UUID()
if err != nil {
return fmt.Errorf("error while getting gpu uuid: %v", err)
}
memoryTotal, memoryUsed, err := device.MemoryInfo()
if err != nil {
return fmt.Errorf("error while getting gpu memory info: %v", err)
}
//TODO: Use housekeepingInterval
utilizationGPU, err := device.AverageGPUUtilization(10 * time.Second)
if err != nil {
return fmt.Errorf("error while getting gpu utilization: %v", err)
}

stats.Accelerators = append(stats.Accelerators, info.AcceleratorStats{
Make: "nvidia",
Model: model,
ID: uuid,
MemoryTotal: memoryTotal,
MemoryUsed: memoryUsed,
DutyCycle: uint64(utilizationGPU),
})
}
return nil
}
Loading

0 comments on commit 4a35130

Please sign in to comment.