diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index b187335b7..853644428 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -57,6 +57,7 @@ type options struct { configSearchPaths cli.StringSlice librarySearchPaths cli.StringSlice + disableHooks cli.StringSlice csv struct { files cli.StringSlice @@ -176,6 +177,12 @@ func (m command) build() *cli.Command { Usage: "Specify a pattern the CSV mount specifications.", Destination: &opts.csv.ignorePatterns, }, + &cli.StringSliceFlag{ + Name: "disable-hook", + Usage: "Comma-separated list of hooks to skip when generating the CDI specification.", + Value: cli.NewStringSlice(), + Destination: &opts.disableHooks, + }, } return &c @@ -262,7 +269,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { deviceNamers = append(deviceNamers, deviceNamer) } - cdilib, err := nvcdi.New( + initOpts := []nvcdi.Option{ nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDevRoot(opts.devRoot), @@ -276,7 +283,17 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), // We set the following to allow for dependency injection: nvcdi.WithNvmlLib(opts.nvmllib), - ) + } + + if len(opts.disableHooks.Value()) > 0 { + for _, hook := range opts.disableHooks.Value() { + for _, hookName := range nvcdi.NewHookName(hook) { + initOpts = append(initOpts, nvcdi.WithDisabledHook(hookName)) + } + } + } + + cdilib, err := nvcdi.New(initOpts...) if err != nil { return nil, fmt.Errorf("failed to create CDI library: %v", err) } diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 9e9a6a4e4..09fc56f4e 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -26,6 +26,7 @@ import ( "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/internal/test" ) @@ -36,6 +37,9 @@ func TestGenerateSpec(t *testing.T) { require.NoError(t, err) driverRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1") + disableHook1 := cli.NewStringSlice("enable-cuda-compat") + disableHook2 := cli.NewStringSlice("enable-cuda-compat", "update-ldcache") + disableHook3 := cli.NewStringSlice("all") logger, _ := testlog.NewNullLogger() testCases := []struct { @@ -112,6 +116,168 @@ containerEdits: - nosuid - nodev - bind +`, + }, + { + description: "disableHooks1", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disableHooks: *disableHook1, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disableHooks: *disableHook1, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - update-ldcache + - --folder + - /lib/x86_64-linux-gnu + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - bind +`, + }, + { + description: "disableHooks2", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disableHooks: *disableHook2, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disableHooks: *disableHook2, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - bind +`, + }, + { + description: "disableHooksAll", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disableHooks: *disableHook3, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disableHooks: *disableHook3, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl `, }, } diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 2988026f3..33596ded5 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -44,4 +44,33 @@ const ( // HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility. // This was added with v1.17.5 of the NVIDIA Container Toolkit. HookEnableCudaCompat = HookName("enable-cuda-compat") + // HookCreateSymlinks refers to the hook used create symlinks inside the + // directory path to be mounted into a container. + HookCreateSymlinks = HookName("create-symlinks") + // HookUpdateLDCache refers to the hook used to Update the dynamic linker + // cache inside the directory path to be mounted into a container. + HookUpdateLDCache = HookName("update-ldcache") ) + +// NewHookName takes a string and returns a []HookName, empty if the HookName +// is invalid and all Hooks if the string is "all" +func NewHookName(hookName string) []HookName { + if hookName == "" { + return []HookName{} + } + + if hookName == "all" { + return []HookName{HookEnableCudaCompat, HookCreateSymlinks, HookUpdateLDCache} + } + + switch hookName { + case string(HookEnableCudaCompat): + return []HookName{HookEnableCudaCompat} + case string(HookCreateSymlinks): + return []HookName{HookCreateSymlinks} + case string(HookUpdateLDCache): + return []HookName{HookUpdateLDCache} + default: + return nil + } +} diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index f49f1129b..145209e51 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -99,21 +99,34 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover var discoverers []discover.Discover - driverDotSoSymlinksDiscoverer := discover.WithDriverDotSoSymlinks( - libraries, - version, - l.nvidiaCDIHookPath, - ) - discoverers = append(discoverers, driverDotSoSymlinksDiscoverer) + if l.HookIsSupported(HookCreateSymlinks) { + driverDotSoSymlinksDiscoverer := discover.WithDriverDotSoSymlinks( + libraries, + version, + l.nvidiaCDIHookPath, + ) + discoverers = append(discoverers, driverDotSoSymlinksDiscoverer) + } if l.HookIsSupported(HookEnableCudaCompat) { // TODO: The following should use the version directly. - cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.nvidiaCDIHookPath, l.driver) + cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer( + l.logger, + l.nvidiaCDIHookPath, + l.driver, + ) discoverers = append(discoverers, cudaCompatLibHookDiscoverer) } - updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.nvidiaCDIHookPath, l.ldconfigPath) - discoverers = append(discoverers, updateLDCache) + if l.HookIsSupported(HookUpdateLDCache) { + updateLDCache, _ := discover.NewLDCacheUpdateHook( + l.logger, + libraries, + l.nvidiaCDIHookPath, + l.ldconfigPath, + ) + discoverers = append(discoverers, updateLDCache) + } d := discover.Merge(discoverers...)