From f65c1f6df21b6f55e050fa3a59c1d309c908a243 Mon Sep 17 00:00:00 2001
From: David Grove <groved@us.ibm.com>
Date: Wed, 14 May 2025 14:07:02 -0400
Subject: [PATCH 1/2] doc fix: missed a 2.16 ==> 2.19 update

---
 setup.RHOAI-v2.19/CLUSTER-SETUP.md | 5 ++---
 setup.tmpl/CLUSTER-SETUP.md.tmpl   | 2 +-
 setup.tmpl/RHOAI-v2.16.yaml        | 1 +
 setup.tmpl/RHOAI-v2.17.yaml        | 6 ------
 setup.tmpl/RHOAI-v2.19.yaml        | 1 +
 5 files changed, 5 insertions(+), 10 deletions(-)
 delete mode 100644 setup.tmpl/RHOAI-v2.17.yaml

diff --git a/setup.RHOAI-v2.19/CLUSTER-SETUP.md b/setup.RHOAI-v2.19/CLUSTER-SETUP.md
index 87046a6..6c20fcf 100644
--- a/setup.RHOAI-v2.19/CLUSTER-SETUP.md
+++ b/setup.RHOAI-v2.19/CLUSTER-SETUP.md
@@ -46,7 +46,7 @@ oc get ip -n redhat-ods-operator
 ```
 ```
 NAMESPACE             NAME            CSV                     APPROVAL   APPROVED
-redhat-ods-operator   install-kmh8w   rhods-operator.2.16.0   Manual     false
+redhat-ods-operator   install-kmh8w   rhods-operator.2.19.0   Manual     false
 ```
 Approve install plan replacing the generated plan name below with the actual
 value:
@@ -73,7 +73,7 @@ AI configuration as follows:
   - `manageJobsWithoutQueueName` is enabled,
   - `batch/job` integration is disabled,
   - `waitForPodsReady` is disabled,
-  - `LendingLimit` feature gate is enabled,
+  - `VisibilityOnDemand` feature gate is disabled,
   - `fairSharing` is enabled,
   - `enableClusterQueueResources` metrics is enabled,
 - Codeflare operator:
@@ -82,7 +82,6 @@ AI configuration as follows:
     - `schedulerName` is set to `scheduler-plugins-scheduler`,
     - `queueName` is set to `default-queue`,
     - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
 
 
 
diff --git a/setup.tmpl/CLUSTER-SETUP.md.tmpl b/setup.tmpl/CLUSTER-SETUP.md.tmpl
index 1cb3f8d..8af0cd4 100644
--- a/setup.tmpl/CLUSTER-SETUP.md.tmpl
+++ b/setup.tmpl/CLUSTER-SETUP.md.tmpl
@@ -78,7 +78,7 @@ Identify install plan:
 ```
 ```
 NAMESPACE             NAME            CSV                     APPROVAL   APPROVED
-redhat-ods-operator   install-kmh8w   rhods-operator.2.16.0   Manual     false
+redhat-ods-operator   install-kmh8w   rhods-operator.{{ .VERSION_NUMBER}}   Manual     false
 ```
 Approve install plan replacing the generated plan name below with the actual
 value:
diff --git a/setup.tmpl/RHOAI-v2.16.yaml b/setup.tmpl/RHOAI-v2.16.yaml
index 17cff67..4fa393b 100644
--- a/setup.tmpl/RHOAI-v2.16.yaml
+++ b/setup.tmpl/RHOAI-v2.16.yaml
@@ -2,5 +2,6 @@
 
 RHOAI: true
 VERSION: RHOAI-v2.16
+VERSION_NUMBER: 2.16.0
 KUBECTL: oc
 FAIRSHARE: true
diff --git a/setup.tmpl/RHOAI-v2.17.yaml b/setup.tmpl/RHOAI-v2.17.yaml
deleted file mode 100644
index c243c3c..0000000
--- a/setup.tmpl/RHOAI-v2.17.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Values for RHOAI 2.17
-
-RHOAI: true
-VERSION: RHOAI-v2.17
-KUBECTL: oc
-FAIRSHARE: true
diff --git a/setup.tmpl/RHOAI-v2.19.yaml b/setup.tmpl/RHOAI-v2.19.yaml
index 0b54073..ba5c840 100644
--- a/setup.tmpl/RHOAI-v2.19.yaml
+++ b/setup.tmpl/RHOAI-v2.19.yaml
@@ -2,5 +2,6 @@
 
 RHOAI: true
 VERSION: RHOAI-v2.19
+VERSION_NUMBER: 2.16.0
 KUBECTL: oc
 FAIRSHARE: true

From d3b2691f3ddfc723454df4e419a2710aaa17e53a Mon Sep 17 00:00:00 2001
From: David Grove <groved@us.ibm.com>
Date: Wed, 14 May 2025 15:30:34 -0400
Subject: [PATCH 2/2] Updated instructions and configurations for RHOAI 2.19

---
 setup.RHOAI-v2.16/CLUSTER-SETUP.md            |   3 -
 setup.RHOAI-v2.19/CLUSTER-SETUP.md            |   5 +-
 setup.RHOAI-v2.19/UPGRADE-FAST.md             |  26 ++-
 setup.RHOAI-v2.19/UPGRADE-STABLE.md           |  26 ++-
 setup.RHOAI-v2.19/mlbatch-subscription.yaml   | 166 +++---------------
 .../mlbatch-upgrade-configmaps.yaml           | 145 +++++++++++++++
 .../mlbatch-upgrade-fast-subscription.yaml    |  34 ++++
 .../mlbatch-upgrade-stable-subscription.yaml  |  34 ++++
 setup.k8s/CLUSTER-SETUP.md                    |   1 -
 setup.k8s/kueue/kustomization.yaml            |   7 -
 setup.tmpl/CLUSTER-SETUP.md.tmpl              |  20 +--
 11 files changed, 285 insertions(+), 182 deletions(-)
 create mode 100644 setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml
 create mode 100644 setup.RHOAI-v2.19/mlbatch-upgrade-fast-subscription.yaml
 create mode 100644 setup.RHOAI-v2.19/mlbatch-upgrade-stable-subscription.yaml

diff --git a/setup.RHOAI-v2.16/CLUSTER-SETUP.md b/setup.RHOAI-v2.16/CLUSTER-SETUP.md
index a4fcc0a..b6ab4a9 100644
--- a/setup.RHOAI-v2.16/CLUSTER-SETUP.md
+++ b/setup.RHOAI-v2.16/CLUSTER-SETUP.md
@@ -82,9 +82,6 @@ AI configuration as follows:
     - `schedulerName` is set to `scheduler-plugins-scheduler`,
     - `queueName` is set to `default-queue`,
     - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
-
-
 
 ## Autopilot
 
diff --git a/setup.RHOAI-v2.19/CLUSTER-SETUP.md b/setup.RHOAI-v2.19/CLUSTER-SETUP.md
index 6c20fcf..038ac3e 100644
--- a/setup.RHOAI-v2.19/CLUSTER-SETUP.md
+++ b/setup.RHOAI-v2.19/CLUSTER-SETUP.md
@@ -46,7 +46,7 @@ oc get ip -n redhat-ods-operator
 ```
 ```
 NAMESPACE             NAME            CSV                     APPROVAL   APPROVED
-redhat-ods-operator   install-kmh8w   rhods-operator.2.19.0   Manual     false
+redhat-ods-operator   install-kmh8w   rhods-operator.2.16.0   Manual     false
 ```
 Approve install plan replacing the generated plan name below with the actual
 value:
@@ -73,7 +73,6 @@ AI configuration as follows:
   - `manageJobsWithoutQueueName` is enabled,
   - `batch/job` integration is disabled,
   - `waitForPodsReady` is disabled,
-  - `VisibilityOnDemand` feature gate is disabled,
   - `fairSharing` is enabled,
   - `enableClusterQueueResources` metrics is enabled,
 - Codeflare operator:
@@ -83,8 +82,6 @@ AI configuration as follows:
     - `queueName` is set to `default-queue`,
     - `slackQueueName` is set to `slack-cluster-queue`
 
-
-
 ## Autopilot
 
 Helm charts values and how-to for customization can be found [in the official documentation](https://github.com/IBM/autopilot/blob/main/helm-charts/autopilot/README.md). As-is, Autopilot will run on GPU nodes.
diff --git a/setup.RHOAI-v2.19/UPGRADE-FAST.md b/setup.RHOAI-v2.19/UPGRADE-FAST.md
index 06db6ab..5a52710 100644
--- a/setup.RHOAI-v2.19/UPGRADE-FAST.md
+++ b/setup.RHOAI-v2.19/UPGRADE-FAST.md
@@ -18,12 +18,28 @@ install-kpzzl   rhods-operator.2.18.0   Manual     false
 install-nqrbp   rhods-operator.2.19.0   Manual     true
 ```
 
-Assuming the install plan exists you can begin the upgrade process.
+Before approving the upgrade, you must manually remove v1alpha1 MultiKueue CRD's
+from your cluster. These CRDs were replaced by v1beta1 versions in the Kueue 0.9 release,
+but the RHOAI operator will not automatically remove CRDs.
+Ensure you have no instances:
+```sh
+kubectl get multikueueclusters.kueue.x-k8s.io --all-namespaces
+kubectl get multikueueconfigs.kueue.x-k8s.io --all-namespaces
+```
+Delete all any instances.  Then delete the CRDs
+```sh
+kubectl delete crd multikueueclusters.kueue.x-k8s.io
+kubectl delete crd multikueueconfigs.kueue.x-k8s.io
+```
+
+Next, update the MLBatch modifications to the default RHOAI configuration maps and subscription.
+```sh
+oc apply -f setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml
+oc apply -f setup.RHOAI-v2.19/mlbatch-upgrade-subscription.yaml
+```
 
-There are no MLBatch modifications to the default RHOAI configuration maps
-beyond those already made in previous installs. Therefore, you can simply
-approve the install plan replacing the example plan name below with the actual
-value on your cluster:
+Finally, you can approve the install plan replacing the example plan name below
+with the actual value on your cluster:
 ```sh
 oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
 ```
diff --git a/setup.RHOAI-v2.19/UPGRADE-STABLE.md b/setup.RHOAI-v2.19/UPGRADE-STABLE.md
index 10a4cf5..a332ea4 100644
--- a/setup.RHOAI-v2.19/UPGRADE-STABLE.md
+++ b/setup.RHOAI-v2.19/UPGRADE-STABLE.md
@@ -21,10 +21,28 @@ install-nqrbp   rhods-operator.2.19.0   Manual     true
 
 Assuming the install plan exists you can begin the upgrade process.
 
-There are no MLBatch modifications to the default RHOAI configuration maps
-beyond those already made in previous installs. Therefore, you can simply
-approve the install plan replacing the example plan name below with the actual
-value on your cluster:
+Before approving the upgrade, you must manually remove v1alpha1 MultiKueue CRD's
+from your cluster. These CRDs were replaced by v1beta1 versions in the Kueue 0.9 release,
+but the RHOAI operator will not automatically remove CRDs.
+Ensure you have no instances:
+```
+kubectl get multikueueclusters.kueue.x-k8s.io --all-namespaces
+kubectl get multikueueconfigs.kueue.x-k8s.io --all-namespaces
+```
+Delete all any instances.  Then delete the CRDs
+```
+kubectl delete crd multikueueclusters.kueue.x-k8s.io
+kubectl delete crd multikueueconfigs.kueue.x-k8s.io
+```
+
+Next, update the MLBatch modifications to the default RHOAI configuration maps and subscription.
+```sh
+oc apply -f setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml
+oc apply -f setup.RHOAI-v2.19/mlbatch-upgrade-stable-subscription.yaml
+```
+
+Finally, you can approve the install plan replacing the example plan name below
+with the actual value on your cluster:
 ```sh
 oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
 ```
diff --git a/setup.RHOAI-v2.19/mlbatch-subscription.yaml b/setup.RHOAI-v2.19/mlbatch-subscription.yaml
index e667279..d47343d 100644
--- a/setup.RHOAI-v2.19/mlbatch-subscription.yaml
+++ b/setup.RHOAI-v2.19/mlbatch-subscription.yaml
@@ -16,84 +16,6 @@ metadata:
 ---
 apiVersion: v1
 kind: ConfigMap
-metadata:
-  name: mlbatch-codeflare
-  namespace: redhat-ods-operator
-data:
-  manager.yaml: |
-    apiVersion: apps/v1
-    kind: Deployment
-    metadata:
-      name: manager
-      namespace: system
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: codeflare-operator
-          app.kubernetes.io/part-of: codeflare
-      replicas: 1
-      template:
-        metadata:
-          annotations:
-            kubectl.kubernetes.io/default-container: manager
-          labels:
-            app.kubernetes.io/name: codeflare-operator
-            app.kubernetes.io/part-of: codeflare
-        spec:
-          priorityClassName: system-node-critical
-          securityContext:
-            runAsNonRoot: true
-            # TODO(user): For common cases that do not require escalating privileges
-            # it is recommended to ensure that all your Pods/Containers are restrictive.
-            # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
-            # Please uncomment the following code if your project does NOT have to work on old Kubernetes
-            # versions < 1.20 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
-            # seccompProfile:
-            #   type: RuntimeDefault
-          containers:
-          - command:
-            - /manager
-            image: $(codeflare_operator_controller_image)
-            imagePullPolicy: Always
-            name: manager
-            securityContext:
-              allowPrivilegeEscalation: false
-              capabilities:
-                drop:
-                  - "ALL"
-            env:
-              - name: NAMESPACE
-                valueFrom:
-                  fieldRef:
-                    fieldPath: metadata.namespace
-            ports:
-              - containerPort: 8080
-                protocol: TCP
-                name: metrics
-            livenessProbe:
-              httpGet:
-                path: /healthz
-                port: 8081
-              initialDelaySeconds: 15
-              periodSeconds: 20
-            readinessProbe:
-              httpGet:
-                path: /readyz
-                port: 8081
-              initialDelaySeconds: 5
-              periodSeconds: 10
-            resources:
-              limits:
-                cpu: "1"
-                memory: 1Gi
-              requests:
-                cpu: "1"
-                memory: 1Gi
-          serviceAccountName: controller-manager
-          terminationGracePeriodSeconds: 10
----
-apiVersion: v1
-kind: ConfigMap
 metadata:
   name: codeflare-operator-config
   namespace: redhat-ods-applications
@@ -129,25 +51,6 @@ data:
 ---
 apiVersion: v1
 kind: ConfigMap
-metadata:
-  name: mlbatch-kuberay
-  namespace: redhat-ods-operator
-data:
-  kuberay-operator-image-patch.yaml: |
-    apiVersion: apps/v1
-    kind: Deployment
-    metadata:
-      name: kuberay-operator
-    spec:
-      template:
-        spec:
-          priorityClassName: system-node-critical
-          containers:
-          - name: kuberay-operator
-            image: $(image)
----
-apiVersion: v1
-kind: ConfigMap
 metadata:
   name: mlbatch-kueue
   namespace: redhat-ods-operator
@@ -158,7 +61,7 @@ data:
     health:
       healthProbeBindAddress: :8081
     metrics:
-      bindAddress: :8080
+      bindAddress: :8443
       enableClusterQueueResources: true
     webhook:
       port: 9443
@@ -171,6 +74,7 @@ data:
         Pod: 5
         Workload.kueue.x-k8s.io: 5
         LocalQueue.kueue.x-k8s.io: 1
+        Cohort.kueue.x-k8s.io: 1
         ClusterQueue.kueue.x-k8s.io: 1
         ResourceFlavor.kueue.x-k8s.io: 1
     clientConnection:
@@ -181,6 +85,9 @@ data:
       enable: false
       blockAdmission: false
     manageJobsWithoutQueueName: true
+    #managedJobsNamespaceSelector:
+    #  matchLabels:
+    #    kueue-managed: "true"
     #internalCertManagement:
     #  enable: false
     #  webhookServiceName: ""
@@ -198,6 +105,8 @@ data:
       - "kubeflow.org/tfjob"
       - "kubeflow.org/xgboostjob"
     # - "pod"
+    #  - "deployment" # requires enabling pod integration
+    #  - "statefulset" # requires enabling pod integration
       externalFrameworks:
       - "AppWrapper.v1beta2.workload.codeflare.dev"
     # podOptions:
@@ -209,31 +118,14 @@ data:
     fairSharing:
       enable: true
       preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
-  manager_config_patch.yaml: |
-    apiVersion: apps/v1
-    kind: Deployment
-    metadata:
-      name: controller-manager
-      namespace: system
-    spec:
-      template:
-        spec:
-          priorityClassName: system-node-critical
-          containers:
-          - name: manager
-            image: $(image)
-            args:
-            - "--config=/controller_manager_config.yaml"
-            - "--zap-log-level=2"
-            - "--feature-gates=LendingLimit=true"
-            volumeMounts:
-            - name: manager-config
-              mountPath: /controller_manager_config.yaml
-              subPath: controller_manager_config.yaml
-          volumes:
-          - name: manager-config
-            configMap:
-              name: manager-config
+    #resources:
+    #  excludeResourcePrefixes: []
+    #  transformations:
+    #  - input: nvidia.com/mig-4g.5gb
+    #    strategy: Replace | Retain
+    #    outputs:
+    #      example.com/accelerator-memory: 5Gi
+    #      example.com/accelerator-gpc: 4
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -249,20 +141,23 @@ data:
     spec:
       template:
         spec:
-          priorityClassName: system-node-critical
           containers:
           - name:  training-operator
             image: $(image)
             args:
             - "--zap-log-level=2"
+            - --pytorch-init-container-image
+            - $(image)
+            - "--webhook-secret-name"
+            - "kubeflow-training-operator-webhook-cert"
+            - "--webhook-service-name"
+            - "kubeflow-training-operator"
             - "--gang-scheduler-name=scheduler-plugins-scheduler"
-            resources:
-              requests:
-                cpu: 100m
-                memory: 100Mi
-              limits:
-                cpu: 500m
-                memory: 1000Mi
+          volumes:
+          - name: cert
+            secret:
+              defaultMode: 420
+              secretName: kubeflow-training-operator-webhook-cert
 ---
 apiVersion: operators.coreos.com/v1alpha1
 kind: Subscription
@@ -283,15 +178,9 @@ spec:
     - name: mlbatch-codeflare
       mountPath: /opt/manifests/codeflare/manager/manager.yaml
       subPath: manager.yaml
-    - name: mlbatch-kuberay
-      mountPath: /opt/manifests/ray/openshift/kuberay-operator-image-patch.yaml
-      subPath: kuberay-operator-image-patch.yaml
     - name: mlbatch-kueue
       mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml
       subPath: controller_manager_config.yaml
-    - name: mlbatch-kueue
-      mountPath: /opt/manifests/kueue/rhoai/manager_config_patch.yaml
-      subPath: manager_config_patch.yaml
     - name: mlbatch-training-operator
       mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml
       subPath: manager_config_patch.yaml
@@ -299,9 +188,6 @@ spec:
     - name: mlbatch-codeflare
       configMap:
         name: mlbatch-codeflare
-    - name: mlbatch-kuberay
-      configMap:
-        name: mlbatch-kuberay
     - name: mlbatch-kueue
       configMap:
         name: mlbatch-kueue
diff --git a/setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml b/setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml
new file mode 100644
index 0000000..d85799d
--- /dev/null
+++ b/setup.RHOAI-v2.19/mlbatch-upgrade-configmaps.yaml
@@ -0,0 +1,145 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: codeflare-operator-config
+  namespace: redhat-ods-applications
+data:
+  config.yaml: |
+    appwrapper:
+      enabled: true
+      Config:
+        autopilot:
+          injectAntiAffinities: true
+          monitorNodes: true
+          resourceTaints:
+            nvidia.com/gpu:
+            - key: autopilot.ibm.com/gpuhealth
+              value: ERR
+              effect: NoSchedule
+            - key: autopilot.ibm.com/gpuhealth
+              value: TESTING
+              effect: NoSchedule
+            - key: autopilot.ibm.com/gpuhealth
+              value: EVICT
+              effect: NoExecute
+        defaultQueueName: default-queue
+        enableKueueIntegrations: true
+        kueueJobReconciller:
+          manageJobsWithoutQueueName: true
+          waitForPodsReady:
+            blockAdmission: false
+            enable: false
+        schedulerName: scheduler-plugins-scheduler
+        slackQueueName: slack-cluster-queue
+        userRBACAdmissionCheck: false
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: mlbatch-kueue
+  namespace: redhat-ods-operator
+data:
+  controller_manager_config.yaml: |
+    apiVersion: config.kueue.x-k8s.io/v1beta1
+    kind: Configuration
+    health:
+      healthProbeBindAddress: :8081
+    metrics:
+      bindAddress: :8443
+      enableClusterQueueResources: true
+    webhook:
+      port: 9443
+    leaderElection:
+      leaderElect: true
+      resourceName: c1f6bfd2.kueue.x-k8s.io
+    controller:
+      groupKindConcurrency:
+        Job.batch: 5
+        Pod: 5
+        Workload.kueue.x-k8s.io: 5
+        LocalQueue.kueue.x-k8s.io: 1
+        Cohort.kueue.x-k8s.io: 1
+        ClusterQueue.kueue.x-k8s.io: 1
+        ResourceFlavor.kueue.x-k8s.io: 1
+    clientConnection:
+      qps: 50
+      burst: 100
+    #pprofBindAddress: :8082
+    waitForPodsReady:
+      enable: false
+      blockAdmission: false
+    manageJobsWithoutQueueName: true
+    #managedJobsNamespaceSelector:
+    #  matchLabels:
+    #    kueue-managed: "true"
+    #internalCertManagement:
+    #  enable: false
+    #  webhookServiceName: ""
+    #  webhookSecretName: ""
+    integrations:
+      frameworks:
+    # - "batch/job"
+      - "kubeflow.org/mpijob"
+      - "ray.io/rayjob"
+      - "ray.io/raycluster"
+      - "jobset.x-k8s.io/jobset"
+      - "kubeflow.org/mxjob"
+      - "kubeflow.org/paddlejob"
+      - "kubeflow.org/pytorchjob"
+      - "kubeflow.org/tfjob"
+      - "kubeflow.org/xgboostjob"
+    # - "pod"
+    #  - "deployment" # requires enabling pod integration
+    #  - "statefulset" # requires enabling pod integration
+      externalFrameworks:
+      - "AppWrapper.v1beta2.workload.codeflare.dev"
+    # podOptions:
+    #   namespaceSelector:
+    #     matchExpressions:
+    #       - key: kubernetes.io/metadata.name
+    #         operator: NotIn
+    #         values: [ kube-system, kueue-system ]
+    fairSharing:
+      enable: true
+      preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
+    #resources:
+    #  excludeResourcePrefixes: []
+    #  transformations:
+    #  - input: nvidia.com/mig-4g.5gb
+    #    strategy: Replace | Retain
+    #    outputs:
+    #      example.com/accelerator-memory: 5Gi
+    #      example.com/accelerator-gpc: 4
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: mlbatch-training-operator
+  namespace: redhat-ods-operator
+data:
+  manager_config_patch.yaml: |
+    apiVersion: apps/v1
+    kind: Deployment
+    metadata:
+      name: training-operator
+    spec:
+      template:
+        spec:
+          containers:
+          - name:  training-operator
+            image: $(image)
+            args:
+            - "--zap-log-level=2"
+            - --pytorch-init-container-image
+            - $(image)
+            - "--webhook-secret-name"
+            - "kubeflow-training-operator-webhook-cert"
+            - "--webhook-service-name"
+            - "kubeflow-training-operator"
+            - "--gang-scheduler-name=scheduler-plugins-scheduler"
+          volumes:
+          - name: cert
+            secret:
+              defaultMode: 420
+              secretName: kubeflow-training-operator-webhook-cert
diff --git a/setup.RHOAI-v2.19/mlbatch-upgrade-fast-subscription.yaml b/setup.RHOAI-v2.19/mlbatch-upgrade-fast-subscription.yaml
new file mode 100644
index 0000000..6bf6ec5
--- /dev/null
+++ b/setup.RHOAI-v2.19/mlbatch-upgrade-fast-subscription.yaml
@@ -0,0 +1,34 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: rhods-operator
+  namespace: redhat-ods-operator
+spec:
+  channel: fast
+  installPlanApproval: Manual
+  name: rhods-operator
+  source: redhat-operators
+  sourceNamespace: openshift-marketplace
+  config:
+    env:
+    - name: "DISABLE_DSC_CONFIG"
+    volumeMounts:
+    - name: mlbatch-codeflare
+      mountPath: /opt/manifests/codeflare/manager/manager.yaml
+      subPath: manager.yaml
+    - name: mlbatch-kueue
+      mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml
+      subPath: controller_manager_config.yaml
+    - name: mlbatch-training-operator
+      mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml
+      subPath: manager_config_patch.yaml
+    volumes:
+    - name: mlbatch-codeflare
+      configMap:
+        name: mlbatch-codeflare
+    - name: mlbatch-kueue
+      configMap:
+        name: mlbatch-kueue
+    - name: mlbatch-training-operator
+      configMap:
+        name: mlbatch-training-operator
\ No newline at end of file
diff --git a/setup.RHOAI-v2.19/mlbatch-upgrade-stable-subscription.yaml b/setup.RHOAI-v2.19/mlbatch-upgrade-stable-subscription.yaml
new file mode 100644
index 0000000..31557aa
--- /dev/null
+++ b/setup.RHOAI-v2.19/mlbatch-upgrade-stable-subscription.yaml
@@ -0,0 +1,34 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: rhods-operator
+  namespace: redhat-ods-operator
+spec:
+  channel: stable
+  installPlanApproval: Manual
+  name: rhods-operator
+  source: redhat-operators
+  sourceNamespace: openshift-marketplace
+  config:
+    env:
+    - name: "DISABLE_DSC_CONFIG"
+    volumeMounts:
+    - name: mlbatch-codeflare
+      mountPath: /opt/manifests/codeflare/manager/manager.yaml
+      subPath: manager.yaml
+    - name: mlbatch-kueue
+      mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml
+      subPath: controller_manager_config.yaml
+    - name: mlbatch-training-operator
+      mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml
+      subPath: manager_config_patch.yaml
+    volumes:
+    - name: mlbatch-codeflare
+      configMap:
+        name: mlbatch-codeflare
+    - name: mlbatch-kueue
+      configMap:
+        name: mlbatch-kueue
+    - name: mlbatch-training-operator
+      configMap:
+        name: mlbatch-training-operator
\ No newline at end of file
diff --git a/setup.k8s/CLUSTER-SETUP.md b/setup.k8s/CLUSTER-SETUP.md
index 9ce72c1..865f024 100644
--- a/setup.k8s/CLUSTER-SETUP.md
+++ b/setup.k8s/CLUSTER-SETUP.md
@@ -98,7 +98,6 @@ operators as follows:
   - `manageJobsWithoutQueueName` is enabled and configured via `managedJobsNamespaceSelector` to be
      scoped to only namespaces that are labeled with `mlbatch-team-namespace=true`.
   - `waitForPodsReady` is disabled,
-  - `LendingLimit` feature gate is enabled,
   - `fairSharing` is enabled,
   - `enableClusterQueueResources` metrics is enabled,
 - AppWrapper operator:
diff --git a/setup.k8s/kueue/kustomization.yaml b/setup.k8s/kueue/kustomization.yaml
index 5b7004c..dca3860 100644
--- a/setup.k8s/kueue/kustomization.yaml
+++ b/setup.k8s/kueue/kustomization.yaml
@@ -44,10 +44,3 @@ patches:
         - get
         - list
         - watch
-- target:
-    kind: Deployment
-    name: controller-manager
-  patch: |
-    - op: add
-      path: /spec/template/spec/containers/0/args/-
-      value: "--feature-gates=LendingLimit=true"
diff --git a/setup.tmpl/CLUSTER-SETUP.md.tmpl b/setup.tmpl/CLUSTER-SETUP.md.tmpl
index 8af0cd4..a9cd2b8 100644
--- a/setup.tmpl/CLUSTER-SETUP.md.tmpl
+++ b/setup.tmpl/CLUSTER-SETUP.md.tmpl
@@ -105,7 +105,9 @@ AI configuration as follows:
   - `manageJobsWithoutQueueName` is enabled,
   - `batch/job` integration is disabled,
   - `waitForPodsReady` is disabled,
+{{- if (eq .VERSION "RHOAI-v2.16") }}
   - `LendingLimit` feature gate is enabled,
+{{- end }}
 {{- if .FAIRSHARE }}
   - `fairSharing` is enabled,
 {{- end }}
@@ -116,24 +118,7 @@ AI configuration as follows:
     - `schedulerName` is set to `scheduler-plugins-scheduler`,
     - `queueName` is set to `default-queue`,
     - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
-
-{{ if (eq .VERSION "RHOAI-v2.13") -}}
-To work around https://issues.redhat.com/browse/RHOAIENG-7887 (a race condition
-in Red Hat OpenShift AI installation), do a rolling restart of the Kueue manager.
-```sh
-{{ .KUBECTL }} rollout restart deployment/kueue-controller-manager -n redhat-ods-applications
-```
 
-After doing the restart, verify that you see the following lines in the
-kueue-controller-manager's log:
-```sh
-{"level":"info","ts":"2024-06-25T20:17:25.689638786Z","logger":"controller-runtime.builder","caller":"builder/webhook.go:189","msg":"Registering a validating webhook","GVK":"kubeflow.org/v1, Kind=PyTorchJob","path":"/validate-kubeflow-org-v1-pytorchjob"}
-{"level":"info","ts":"2024-06-25T20:17:25.689698615Z","logger":"controller-runtime.webhook","caller":"webhook/server.go:183","msg":"Registering webhook","path":"/validate-kubeflow-org-v1-pytorchjob"}
-{"level":"info","ts":"2024-06-25T20:17:25.689743757Z","logger":"setup","caller":"jobframework/setup.go:81","msg":"Set up controller and webhook for job framework","jobFrameworkName":"kubeflow.org/pytorchjob"}
-
-```
-{{- end }}
 {{- else -}}
 ## Install Operators
 
@@ -182,7 +167,6 @@ operators as follows:
   - `manageJobsWithoutQueueName` is enabled and configured via `managedJobsNamespaceSelector` to be
      scoped to only namespaces that are labeled with `mlbatch-team-namespace=true`.
   - `waitForPodsReady` is disabled,
-  - `LendingLimit` feature gate is enabled,
 {{- if .FAIRSHARE }}
   - `fairSharing` is enabled,
 {{- end }}