From 86ea0b56edd898f9da9d9d376146bf1c0b89917f Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 23 Jan 2024 01:48:25 +0000 Subject: [PATCH] Add support for deploying GDRCopy driver in the driver daemonset --- api/v1/clusterpolicy_types.go | 65 ++++++++++- api/v1/zz_generated.deepcopy.go | 40 +++++++ assets/state-driver/0500_daemonset.yaml | 36 ++++++ ...rator-certified.clusterserviceversion.yaml | 3 + .../manifests/nvidia.com_clusterpolicies.yaml | 47 ++++++++ .../crd/bases/nvidia.com_clusterpolicies.yaml | 47 ++++++++ controllers/object_controls.go | 105 +++++++++++++++++- .../crds/nvidia.com_clusterpolicies_crd.yaml | 47 ++++++++ .../gpu-operator/templates/clusterpolicy.yaml | 23 ++++ deployments/gpu-operator/values.yaml | 10 ++ 10 files changed, 419 insertions(+), 4 deletions(-) diff --git a/api/v1/clusterpolicy_types.go b/api/v1/clusterpolicy_types.go index 04de0ed7a..0e31d6916 100644 --- a/api/v1/clusterpolicy_types.go +++ b/api/v1/clusterpolicy_types.go @@ -69,6 +69,8 @@ type ClusterPolicySpec struct { Validator ValidatorSpec `json:"validator,omitempty"` // GPUDirectStorage defines the spec for GDS components(Experimental) GPUDirectStorage *GPUDirectStorageSpec `json:"gds,omitempty"` + // GDRCopy component spec + GDRCopy *GDRCopySpec `json:"gdrcopy,omitempty"` // SandboxWorkloads defines the spec for handling sandbox workloads (i.e. Virtual Machines) SandboxWorkloads SandboxWorkloadsSpec `json:"sandboxWorkloads,omitempty"` // VFIOManager for configuration to deploy VFIO-PCI Manager @@ -1258,6 +1260,53 @@ type GPUDirectStorageSpec struct { Env []EnvVar `json:"env,omitempty"` } +// GDRCopySpec defines the properties for NVIDIA GDRCopy driver (gdrdrv) deployment +type GDRCopySpec struct { + // Enabled indicates if GDRCopy is enabled through GPU Operator + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable GDRCopy through GPU operator" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + Enabled *bool `json:"enabled,omitempty"` + + // NVIDIA GDRCopy driver image repository + // +kubebuilder:validation:Optional + Repository string `json:"repository,omitempty"` + + // NVIDIA GDRCopy driver image name + // +kubebuilder:validation:Pattern=[a-zA-Z0-9\-]+ + Image string `json:"image,omitempty"` + + // NVIDIA GDRCopy driver image tag + // +kubebuilder:validation:Optional + Version string `json:"version,omitempty"` + + // Image pull policy + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Image Pull Policy" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:imagePullPolicy" + ImagePullPolicy string `json:"imagePullPolicy,omitempty"` + + // Image pull secrets + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Image pull secrets" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:io.kubernetes:Secret" + ImagePullSecrets []string `json:"imagePullSecrets,omitempty"` + + // Optional: List of arguments + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Arguments" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" + Args []string `json:"args,omitempty"` + + // Optional: List of environment variables + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" + Env []EnvVar `json:"env,omitempty"` +} + // MIGPartedConfigSpec defines custom mig-parted config for NVIDIA MIG Manager container type MIGPartedConfigSpec struct { // ConfigMap name @@ -1703,6 +1752,9 @@ func ImagePath(spec interface{}) (string, error) { case *GPUDirectStorageSpec: config := spec.(*GPUDirectStorageSpec) return imagePath(config.Repository, config.Image, config.Version, "GDS_IMAGE") + case *GDRCopySpec: + config := spec.(*GDRCopySpec) + return imagePath(config.Repository, config.Image, config.Version, "GDRCOPY_IMAGE") case *VFIOManagerSpec: config := spec.(*VFIOManagerSpec) return imagePath(config.Repository, config.Image, config.Version, "VFIO_MANAGER_IMAGE") @@ -1891,7 +1943,7 @@ func (m *NodeStatusExporterSpec) IsEnabled() bool { return *m.Enabled } -// IsEnabled returns true if GPUDirect RDMA are enabled through gpu-perator +// IsEnabled returns true if GPUDirect RDMA are enabled through gpu-operator func (g *GPUDirectRDMASpec) IsEnabled() bool { if g.Enabled == nil { // GPUDirectRDMA is disabled by default @@ -1900,7 +1952,7 @@ func (g *GPUDirectRDMASpec) IsEnabled() bool { return *g.Enabled } -// IsEnabled returns true if GPUDirect Storage are enabled through gpu-perator +// IsEnabled returns true if GPUDirect Storage are enabled through gpu-operator func (gds *GPUDirectStorageSpec) IsEnabled() bool { if gds.Enabled == nil { // GPUDirectStorage is disabled by default @@ -1931,6 +1983,15 @@ func (gds *GPUDirectStorageSpec) IsOpenKernelModulesRequired() bool { return false } +// IsEnabled returns true if GDRCopy is enabled through gpu-operator +func (gdrcopy *GDRCopySpec) IsEnabled() bool { + if gdrcopy.Enabled == nil { + // GDRCopy is disabled by default + return false + } + return *gdrcopy.Enabled +} + // IsEnabled returns true if DCGM hostengine as a separate Pod is enabled through gpu-perator func (dcgm *DCGMSpec) IsEnabled() bool { if dcgm.Enabled == nil { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 28b108edd..d80b36109 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -195,6 +195,11 @@ func (in *ClusterPolicySpec) DeepCopyInto(out *ClusterPolicySpec) { *out = new(GPUDirectStorageSpec) (*in).DeepCopyInto(*out) } + if in.GDRCopy != nil { + in, out := &in.GDRCopy, &out.GDRCopy + *out = new(GDRCopySpec) + (*in).DeepCopyInto(*out) + } in.SandboxWorkloads.DeepCopyInto(&out.SandboxWorkloads) in.VFIOManager.DeepCopyInto(&out.VFIOManager) in.SandboxDevicePlugin.DeepCopyInto(&out.SandboxDevicePlugin) @@ -717,6 +722,41 @@ func (in *EnvVar) DeepCopy() *EnvVar { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GDRCopySpec) DeepCopyInto(out *GDRCopySpec) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]EnvVar, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GDRCopySpec. +func (in *GDRCopySpec) DeepCopy() *GDRCopySpec { + if in == nil { + return nil + } + out := new(GDRCopySpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPUDirectRDMASpec) DeepCopyInto(out *GPUDirectRDMASpec) { *out = *in diff --git a/assets/state-driver/0500_daemonset.yaml b/assets/state-driver/0500_daemonset.yaml index c695fdf41..8ef619740 100644 --- a/assets/state-driver/0500_daemonset.yaml +++ b/assets/state-driver/0500_daemonset.yaml @@ -236,6 +236,42 @@ spec: failureThreshold: 1 successThreshold: 1 timeoutSeconds: 10 + - image: "FILLED BY THE OPERATOR" + imagePullPolicy: IfNotPresent + name: nvidia-gdrcopy-ctr + command: [bash, -xc] + args: ["until [ -d /run/nvidia/driver/usr/src ] && lsmod | grep nvidia; do echo Waiting for nvidia-driver to be installed...; sleep 10; done; exec nvidia-gdrcopy-driver install"] + securityContext: + privileged: true + seLinuxOptions: + level: "s0" + volumeMounts: + - name: run-nvidia + mountPath: /run/nvidia + mountPropagation: HostToContainer + - name: var-log + mountPath: /var/log + - name: dev-log + mountPath: /dev/log + readOnly: true + startupProbe: + exec: + command: + [sh, -c, 'lsmod | grep gdrdrv'] + initialDelaySeconds: 10 + failureThreshold: 120 + successThreshold: 1 + periodSeconds: 10 + timeoutSeconds: 10 + livenessProbe: + exec: + command: + [sh, -c, 'lsmod | grep gdrdrv'] + periodSeconds: 30 + initialDelaySeconds: 30 + failureThreshold: 1 + successThreshold: 1 + timeoutSeconds: 10 # Only kept when OpenShift DriverToolkit side-car is enabled. - image: "FILLED BY THE OPERATOR" imagePullPolicy: IfNotPresent diff --git a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml index 815d04eab..3576a31b9 100644 --- a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml +++ b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml @@ -133,6 +133,9 @@ metadata: }, "gds": { "enabled": false + }, + "gdrcopy": { + "enabled": false } } }, diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index b46ae27f9..66589a3ea 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -960,6 +960,53 @@ spec: type: string type: object type: object + gdrcopy: + description: GDRCopy component spec + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if GDRCopy is enabled through GPU + Operator + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + image: + description: NVIDIA GDRCopy driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA GDRCopy driver image repository + type: string + version: + description: NVIDIA GDRCopy driver image tag + type: string + type: object gds: description: GPUDirectStorage defines the spec for GDS components(Experimental) properties: diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index b46ae27f9..66589a3ea 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -960,6 +960,53 @@ spec: type: string type: object type: object + gdrcopy: + description: GDRCopy component spec + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if GDRCopy is enabled through GPU + Operator + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + image: + description: NVIDIA GDRCopy driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA GDRCopy driver image repository + type: string + version: + description: NVIDIA GDRCopy driver image tag + type: string + type: object gds: description: GPUDirectStorage defines the spec for GDS components(Experimental) properties: diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 810623979..aa9fc0297 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -871,6 +871,12 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C return err } + // updated nvidia-gdrcopy sidecar container + err = transformGDRCopyContainer(obj, config, n) + if err != nil { + return err + } + // update/remove OpenShift Driver Toolkit sidecar container err = transformOpenShiftDriverToolkitContainer(obj, config, n, "nvidia-driver-ctr") if err != nil { @@ -2522,6 +2528,85 @@ func transformGDSContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe return nil } +func transformGDRCopyContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error { + for i, container := range obj.Spec.Template.Spec.Containers { + // skip if not nvidia-gdrcopy + if !strings.HasPrefix(container.Name, "nvidia-gdrcopy") { + continue + } + if config.GDRCopy == nil || !config.GDRCopy.IsEnabled() { + n.rec.Log.Info("GDRCopy is disabled") + // remove nvidia-gdrcopy sidecar container from driver Daemonset if gdrcopy is not enabled + obj.Spec.Template.Spec.Containers = append(obj.Spec.Template.Spec.Containers[:i], obj.Spec.Template.Spec.Containers[i+1:]...) + return nil + } + if config.Driver.UsePrecompiledDrivers() { + return fmt.Errorf("GDRCopy is not supported along with pre-compiled NVIDIA drivers") + } + + gdrcopyContainer := &obj.Spec.Template.Spec.Containers[i] + + // update nvidia-gdrcopy image and pull policy + gdrcopyImage, err := resolveDriverTag(n, config.GDRCopy) + if err != nil { + return err + } + if gdrcopyImage != "" { + gdrcopyContainer.Image = gdrcopyImage + } + if config.GDRCopy.ImagePullPolicy != "" { + gdrcopyContainer.ImagePullPolicy = gpuv1.ImagePullPolicy(config.GDRCopy.ImagePullPolicy) + } + + // set image pull secrets + if len(config.GDRCopy.ImagePullSecrets) > 0 { + addPullSecrets(&obj.Spec.Template.Spec, config.GDRCopy.ImagePullSecrets) + } + + // set/append environment variables for gdrcopy container + if len(config.GDRCopy.Env) > 0 { + for _, env := range config.GDRCopy.Env { + setContainerEnv(gdrcopyContainer, env.Name, env.Value) + } + } + + if config.Driver.RepoConfig != nil && config.Driver.RepoConfig.ConfigMapName != "" { + // note: transformDriverContainer() will have already created a Volume backed by the ConfigMap. + // Only add a VolumeMount for nvidia-gdrcopy-ctr. + destinationDir, err := getRepoConfigPath() + if err != nil { + return fmt.Errorf("ERROR: failed to get destination directory for custom repo config: %w", err) + } + volumeMounts, _, err := createConfigMapVolumeMounts(n, config.Driver.RepoConfig.ConfigMapName, destinationDir) + if err != nil { + return fmt.Errorf("ERROR: failed to create ConfigMap VolumeMounts for custom package repo config: %w", err) + } + gdrcopyContainer.VolumeMounts = append(gdrcopyContainer.VolumeMounts, volumeMounts...) + } + + // set any custom ssl key/certificate configuration provided + if config.Driver.CertConfig != nil && config.Driver.CertConfig.Name != "" { + destinationDir, err := getCertConfigPath() + if err != nil { + return fmt.Errorf("ERROR: failed to get destination directory for ssl key/cert config: %w", err) + } + volumeMounts, _, err := createConfigMapVolumeMounts(n, config.Driver.CertConfig.Name, destinationDir) + if err != nil { + return fmt.Errorf("ERROR: failed to create ConfigMap VolumeMounts for custom certs: %w", err) + } + gdrcopyContainer.VolumeMounts = append(gdrcopyContainer.VolumeMounts, volumeMounts...) + } + + // transform the nvidia-gdrcopy-ctr to use the openshift driver toolkit + // notify openshift driver toolkit container that gdrcopy is enabled + err = transformOpenShiftDriverToolkitContainer(obj, config, n, "nvidia-gdrcopy-ctr") + if err != nil { + return fmt.Errorf("ERROR: failed to transform the Driver Toolkit Container: %w", err) + } + } + return nil +} + // getSanitizedKernelVersion returns kernelVersion with following changes // 1. Remove arch suffix (as we use multi-arch images) and // 2. ensure to meet k8s constraints for metadata.name, i.e it @@ -2617,6 +2702,11 @@ func transformOpenShiftDriverToolkitContainer(obj *appsv1.DaemonSet, config *gpu n.rec.Log.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDS_ENABLED", config.GPUDirectStorage.IsEnabled()) } + if config.GDRCopy != nil && config.GDRCopy.IsEnabled() { + setContainerEnv(driverToolkitContainer, "GDRCOPY_ENABLED", "true") + n.rec.Log.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDRCOPY_ENABLED", "true") + } + image := n.ocpDriverToolkit.rhcosDriverToolkitImages[n.ocpDriverToolkit.currentRhcosVersion] if image != "" { driverToolkitContainer.Image = image @@ -2635,13 +2725,18 @@ func transformOpenShiftDriverToolkitContainer(obj *appsv1.DaemonSet, config *gpu } /* prepare the main container to start from the DriverToolkit entrypoint */ - if strings.Contains(mainContainerName, "nvidia-fs") { + switch mainContainerName { + case "nvidia-fs-ctr": mainContainer.Command = []string{"ocp_dtk_entrypoint"} mainContainer.Args = []string{"nv-fs-ctr-run-with-dtk"} - } else { + case "nvidia-gdrcopy-ctr": + mainContainer.Command = []string{"ocp_dtk_entrypoint"} + mainContainer.Args = []string{"gdrcopy-ctr-run-with-dtk"} + default: mainContainer.Command = []string{"ocp_dtk_entrypoint"} mainContainer.Args = []string{"nv-ctr-run-with-dtk"} } + /* prepare the shared volumes */ // shared directory volSharedDirName, volSharedDirPath := "shared-nvidia-driver-toolkit", "/mnt/shared-nvidia-driver-toolkit" @@ -2712,6 +2807,12 @@ func resolveDriverTag(n ClusterPolicyController, driverSpec interface{}) (string if err != nil { return "", err } + case *gpuv1.GDRCopySpec: + spec := driverSpec.(*gpuv1.GDRCopySpec) + image, err = gpuv1.ImagePath(spec) + if err != nil { + return "", err + } default: return "", fmt.Errorf("Invalid type to construct image path: %v", v) } diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml index b46ae27f9..66589a3ea 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml @@ -960,6 +960,53 @@ spec: type: string type: object type: object + gdrcopy: + description: GDRCopy component spec + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if GDRCopy is enabled through GPU + Operator + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + image: + description: NVIDIA GDRCopy driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA GDRCopy driver image repository + type: string + version: + description: NVIDIA GDRCopy driver image tag + type: string + type: object gds: description: GPUDirectStorage defines the spec for GDS components(Experimental) properties: diff --git a/deployments/gpu-operator/templates/clusterpolicy.yaml b/deployments/gpu-operator/templates/clusterpolicy.yaml index cf015f8c3..655e9d26f 100644 --- a/deployments/gpu-operator/templates/clusterpolicy.yaml +++ b/deployments/gpu-operator/templates/clusterpolicy.yaml @@ -618,6 +618,29 @@ spec: args: {{ toYaml .Values.gds.args | nindent 6 }} {{- end }} {{- end }} + {{- if .Values.gdrcopy }} + gdrcopy: + enabled: {{ .Values.gdrcopy.enabled | default false }} + {{- if .Values.gdrcopy.repository }} + repository: {{ .Values.gdrcopy.repository }} + {{- end }} + {{- if .Values.gdrcopy.image }} + image: {{ .Values.gdrcopy.image }} + {{- end }} + version: {{ .Values.gdrcopy.version | quote }} + {{- if .Values.gdrcopy.imagePullPolicy }} + imagePullPolicy: {{ .Values.gdrcopy.imagePullPolicy }} + {{- end }} + {{- if .Values.gdrcopy.imagePullSecrets }} + imagePullSecrets: {{ toYaml .Values.gdrcopy.imagePullSecrets | nindent 8 }} + {{- end }} + {{- if .Values.gdrcopy.env }} + env: {{ toYaml .Values.gdrcopy.env | nindent 6 }} + {{- end }} + {{- if .Values.gdrcopy.args }} + args: {{ toYaml .Values.gdrcopy.args | nindent 6 }} + {{- end }} + {{- end }} sandboxWorkloads: enabled: {{ .Values.sandboxWorkloads.enabled }} {{- if .Values.sandboxWorkloads.defaultWorkload }} diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index 1557aab50..8c6de40d8 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -371,6 +371,16 @@ gds: env: [] args: [] +gdrcopy: + enabled: false + repository: nvcr.io/nvidia/cloud-native + image: gdrdrv + version: "v2.4.1" + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + env: [] + args: [] + vgpuManager: enabled: false repository: ""