diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
deleted file mode 100644
index 517eac1..0000000
--- a/.github/workflows/CI.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: CI
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
-
-jobs:
- CI:
- runs-on: ubuntu-latest
- steps:
- - name: checkout code
- uses: actions/checkout@v4
- with:
- submodules: false
-
- - name: Set latest tag and branch name
- run: |
- echo "GIT_BRANCH=gha-ci" >> $GITHUB_ENV
- echo "TAG=$GITHUB_RUN_ID" >> $GITHUB_ENV
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.11'
-
- - name: Install Helm
- uses: azure/setup-helm@v4
-
- - name: Install Helm unittest plugin
- run: |
- helm plugin install https://github.com/helm-unittest/helm-unittest.git
-
- - name: Run pre-commit checks
- run: |
- pip install pre-commit
- pre-commit run --show-diff-on-failure --color=always --all-files
-
diff --git a/.github/workflows/gh-pages-static.yml b/.github/workflows/gh-pages-static.yml
index 18381fb..b9c9160 100644
--- a/.github/workflows/gh-pages-static.yml
+++ b/.github/workflows/gh-pages-static.yml
@@ -1,8 +1,13 @@
-# Workflow to invoke from another workflow to deploy static content to GitHub Pages
+# Simple workflow for deploying static content to GitHub Pages
name: Deploy static content to Pages
on:
- workflow_call:
+ # Runs on pushes targeting the default branch
+ push:
+ branches: ["gh-pages"]
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
@@ -17,6 +22,7 @@ concurrency:
cancel-in-progress: false
jobs:
+ # Single deploy job since we're just deploying
deploy:
environment:
name: github-pages
@@ -25,8 +31,6 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- with:
- ref: gh-pages
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
diff --git a/.github/workflows/release-chart.yaml b/.github/workflows/release-chart.yaml
deleted file mode 100644
index a14c938..0000000
--- a/.github/workflows/release-chart.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: Release Charts
-
-on:
- # This workflow is meant to be triggered manually from the Actions tab
- workflow_dispatch:
-
-jobs:
- release:
- permissions:
- contents: write
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Configure Git
- run: |
- git config user.name "$GITHUB_ACTOR"
- git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
-
- - name: Install Helm
- uses: azure/setup-helm@v4
-
- - name: Run chart-releaser for pytorchjob-generate
- uses: helm/chart-releaser-action@v1.6.0
- with:
- charts_dir: tools/pytorchjob-generator
- packages_with_index: true
- skip_existing: true
- env:
- CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
-
- - name: Run chart-releaser for sakkara-deploy
- uses: helm/chart-releaser-action@v1.6.0
- with:
- charts_dir: tools/sakkara-deploy
- packages_with_index: true
- skip_existing: true
- env:
- CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
-
- publish:
- needs: release
- uses: project-codeflare/mlbatch/.github/workflows/gh-pages-static.yml@main
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index c96f019..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-.vscode
-node_modules/
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 523fb29..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,4 +0,0 @@
-[submodule "scheduler-plugins"]
- path = scheduler-plugins
- url = https://github.com/kubernetes-sigs/scheduler-plugins.git
- branch = release-1.28
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 01a9314..4e6a92c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,22 +1 @@
-repos:
-- repo: https://github.com/norwoodj/helm-docs
- rev: "v1.13.1"
- hooks:
- - id: helm-docs-built
- args:
- - --chart-search-root=tools/pytorchjob-generator
- - --sort-values-order=file
-- repo: local
- hooks:
- - id: helm-unittests
- name: run helm unittests
- language: system
- entry: helm unittest
- pass_filenames: false
- always_run: true
- args:
- - tools/pytorchjob-generator/chart
-- repo: https://github.com/standard/standard
- rev: "v17.1.2"
- hooks:
- - id: standard
+repos: []
diff --git a/CODEFLARE.md b/CODEFLARE.md
deleted file mode 100644
index cd0baba..0000000
--- a/CODEFLARE.md
+++ /dev/null
@@ -1,365 +0,0 @@
-# MLBatch for CodeFlare Users
-
-MLBatch is an evolution of the [CodeFlare](https://github.com/project-codeflare)
-stack for managing AI/ML workloads on Kubernetes and its workload dispatcher
-[MCAD](https://github.com/project-codeflare/multi-cluster-app-dispatcher).
-
-Like MCAD, MLBatch is designed to queue workloads and admit them for execution over time,
-accounting for quotas, priorities, and precedence. MLBatch relies on
-[AppWrappers](https://github.com/project-codeflare/appwrapper) to bundle
-together all the components of a workloads such as pods, PyTorch jobs, Ray jobs,
-config maps, secrets, etc. AppWrappers in MLBatch offer improved mechanisms to
-automatically detect and retry failed workloads. MLBatch includes a
-backward-compatible [pytorch-generator](tools/pytorchjob-generator/) Helm
-template to facilitate the specification of PyTorch jobs.
-
-In this document, we review the key innovations introduced by MLBatch and
-differences with the earlier setup built around MCAD.
-
-## Kueue
-
-MLBatch replaces MCAD with [Kueue](https://kueue.sigs.k8s.io) to queue and
-admit jobs. Kueue introduces a new quota management system based on [cluster
-queues](https://kueue.sigs.k8s.io/docs/concepts/cluster_queue/). This quota
-system provides more flexibility to allocate compute resources (CPU, memory, and
-GPU quotas) than [resource
-quotas](https://kubernetes.io/docs/concepts/policy/resource-quotas/) in core
-Kubernetes. This system allows the borrowing of unused quota between
-cluster queues (see [Priorities and Preemption below](#priorities-and-preemption)).
-Borrowing enables high overall cluster resource utilization while
-still ensuring that every team always has the ability to run jobs up to their
-allocated quotas. Kueue also enables teams to use
-priorities to order jobs within their own cluster queue without those
-priorities impacting the scheduling of jobs by other cluster queues.
-
-Unlike MCAD, Kueue only considers quotas when admitting workloads. As a result,
-MLBatch must ensure that all resource-consuming workloads in user namespaces are managed
-by Kueue. This is accomplished by strictly [limiting the Kinds](#allowed-kinds)
-of non-AppWrapper resources users are permitted to create.
-
-For various reasons, workloads are not directly submitted to cluster queues but
-rather to namespaced [local
-queues](https://kueue.sigs.k8s.io/docs/concepts/local_queue/) that feed into the
-cluster queues. By convention in MLBatch, each team is assigned a namespace and
-a cluster queue dedicated to the team. For example, the _platform_ team is
-assigned to namespace `platform` and its associated cluster queue named
-`platform-cluster-queue`. The local queue name in each namespace in MLBatch is always `default-queue`.
-Hence, the `default-queue` in namespace `platform` feeds into the
-`platform-cluster-queue`. In short, all workloads must be submitted to the local
-queue named `default-queue` but to review quota allocation and usage, one has to
-query the cluster queues.
-
-MLBatch offers a simple [cluster-checker](tools/cluster-checker/) tool to get a
-bird’s-eye view of quotas on a cluster from a GPU perspective:
-```sh
-node checker.js
-```
-```
-CLUSTER QUEUE GPU QUOTA GPU USAGE ADMITTED WORKLOADS PENDING WORKLOADS
-code-cluster-queue 8 16 1 0
-platform-cluster-queue 8 4 4 0
-
-Total GPU count in cluster: 24
-Unschedulable GPU count: - 0
-Schedulable GPU count: = 24
-
-Nominal GPU quota: 16
-Slack GPU quota: + 8
-Total GPU quota: = 24
-
-GPU usage by admitted workloads: 20
-Borrowed GPU count: 8
-```
-The tool lists the cluster queues defined on the cluster showing the GPU
-quota for each one as well as the number of GPUs in use by admitted workloads.
-The GPU usage may exceed the GPU quota for the cluster queue if this cluster queue
-is borrowing idle capacity.
-
-The tool also reports the total GPU capacity distinguishing healthy (i.e.,
-schedulable, available for use) and unhealthy (i.e., unschedulable, unavailable)
-GPUs. The nominal GPU quota represents the cumulative GPU quota across all the
-teams. MLBatch recommends that cluster admins keep the nominal quota below the
-cluster capacity to avoid oversubscribing the GPUs. Typically, a small number of
-GPUs is not allocated to any team but retained as a slack quota that any team
-may borrow from. MLBatch automatically adjusts the slack quota to ensure the
-schedulable GPU count and nominal quota remain equal, unless of course this
-slack becomes virtually negative, in which case a cluster admin should decide
-how to reduce the nominal quota.
-
-For more details about the cluster queues run:
-```sh
-kubectl describe clusterqueues
-```
-
-## AppWrappers
-
-MLBatch recommends submitting every workload as an
-[AppWrapper](https://github.com/project-codeflare/appwrapper). AppWrappers offer
-a number of checks, guarantees, and benefits over submitting unwrapped
-[PyTorchJobs](https://www.kubeflow.org/docs/components/training/user-guides/pytorch/)
-for example. In particular, the AppWrapper controller automatically injects:
-- labels holding the name and id of the user submitting the AppWrapper,
-- the `queueName` label required to queue the workload in the `default-queue`,
- and
-- the `schedulerName` specification required to enable gang scheduling and
- packing on the GPU dimension to mitigate node fragmentation.
-
-Moreover, the AppWrapper controller consistently handles cleanup and retries
-across all types of workloads:
-- The resources, especially the GPUs, utilized by a failed workload are returned
- to the cluster in a timely manner, i.e., within minutes by default, with a
- configurable grace period to permit post-mortem debugging. Cluster admins can
- enforce an upper bound on this grace period to bound resource wastage.
-- The Kubernetes objects associated with a completed workload, in particular the
- pods and their logs, are eventually disposed of, by default after a week.
-- Failed workloads are automatically retried up to a configurable number of
- attempts.
-
-The AppWrapper specification has been greatly simplified for MLBatch. In most
-cases, an AppWrapper yaml adds a simple prefix to a workload yaml, for instance
-for a pod:
-```yaml
-# appwrapper prefix
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: wrapped-pod
-spec:
- components:
- - template:
- # indented pod specification
- apiVersion: v1
- kind: Pod
- metadata:
- name: sample-pod
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
-```
-To submit this workload to the cluster, save this yaml to `wrapped-pod.yaml` and
-run:
-```sh
-kubectl apply -f wrapped-pod.yaml
-```
-
-MLBatch includes an [appwrapper-packager](tools/appwrapper-packager/) tool to
-automate the addition this prefix as well as the indentation of the workload
-specification. In addition, MLBatch includes a new implementation of the
-[pytorch-generator](tools/pytorchjob-generator/) tool to facilitate the
-configuration of PyTorch jobs including the addition of the AppWrapper prefix.
-
-As a result of the AppWrapper simplification for MLBatch, AppWrappers which are
-now in version `v1beta2` are not backward compatible with MCAD's `v1beta1`
-AppWrappers. The companion pytorch-generator tool for MCAD is not compatible
-with MLBatch. However, the pytorch-generator tool included in MLBatch is
-backward compatible with the input format of the legacy tool. In other words,
-simply rerun `helm template` on the input `value.yaml` files to generate proper
-`v1beta2` AppWrappers. Please note that existing fault-tolerance-related
-settings from these input files will be ignored and default will be used
-instead. Please refer to the tool [documentation](tools/pytorchjob-generator/)
-for how to override settings such as max retry counts.
-
-The list of all AppWrappers in a namespace is obtained by running:
-```sh
-kubectl get appwrappers
-```
-```
-NAME STATUS QUOTA RESERVED RESOURCES DEPLOYED UNHEALTHY
-wrapped-pod Succeeded False True False
-```
-The status of an AppWrapper is one of:
-- Suspended: the AppWrapper is queued,
-- Resuming: the AppWrapper is transitioning to Running,
-- Running: the AppWrapper is running,
-- Succeeded: the execution completed successfully,
-- Failed: the execution failed and will not be retried,
-- Resetting: a failure has been detected during the current execution and the
- AppWrapper is preparing to retry,
-- Suspending: the AppWrapper has been evicted by Kueue and is transitioning back
- to Suspended.
-
-```mermaid
----
-title: AppWrapper Lifecycle
----
-stateDiagram-v2
- f : Failed
- sp : Suspended
- ad : Admitted
- s : Succeeded
- su: Suspending
-
- state ad {
- [*] --> rs
- rs --> rn
- rn --> rt
- rt --> rs
-
- rs : Resuming
- rn : Running
- rt : Resetting
- }
-
- [*] --> sp
- sp --> ad
- rn --> s
- ad --> su
- su --> sp
- ad --> f
-
- classDef admitted fill:lightblue
- class rs admitted
- class rn admitted
- class rt admitted
-
- classDef failed fill:pink
- class f failed
-
- classDef succeeded fill:lightgreen
- class s succeeded
-```
-In this diagram, the outer loop consisting of the `Suspended`, `Admitted`, and
-`Suspending` states is managed by Kueue, while the inner loop consisting of the
-`Resuming`, `Running`, and `Resetting` states is managed by the AppWrapper
-controller. In particular, the AppWrapper controller handles workload retries
-without releasing and reacquiring Kueue quotas, hence without moving retried
-workloads to the back of the cluster queue.
-
-In addition, this AppWrapper table also reports:
-- quota reserved: whether Kueue has reserved the quota requested by the
- AppWrapper,
-- resource deployed: whether the resources wrapped by the AppWrapper, such as
-the `sample-pod` in this example have been created on the cluster,
-- unhealthy: whether a failure has been detected during the current execution of
- the AppWrapper.
-
-For example, a `Running` AppWrapper has both quota reserved and resource
-deployed. A `Succeeded` AppWrapper will no longer reserve quota but the wrapped
-resources such as terminated pods will be preserved on the cluster for a period
-of time as discussed above to permit log collection. A `Failed` AppWrapper will
-transiently continue to reserve quota until the wrapped resources have been
-undeployed, so as to avoid oversubscribing GPUs during the cleanup of failed
-jobs.
-
-More details about an AppWrapper condition may be obtained by describing the
-AppWrapper:
-```sh
-kubectl describe appwrapper wrapped-pod
-```
-Kueue creates and maintains a companion `Workload` object for each workload it
-manages. Further details about the AppWrapper condition such as Kueue's
-rationale for evicting the workload may be obtained by accessing this companion
-object:
-```sh
-kubectl get workloads
-```
-```
-NAME QUEUE RESERVED IN ADMITTED AGE
-appwrapper-wrapped-pod-81d3e default-queue team1-cluster-queue True 161m
-```
-```sh
-kubectl describe workload appwrapper-wrapped-pod-81d3e
-```
-Workload objects are automatically deleted by Kueue when the workload itself,
-i.e., the AppWrapper is deleted.
-
-## Priorities and Preemption
-
-MLBatch supports the `high-priority`, `default-priority`, and `low-priority`
-priority classes.
-
-If you are using the pytorch-generator tool, you can override the default
-`default-priority` of a workload by setting the `priority` variable. If you
-are generating your yaml by other means, simply add a `priorityClassName`
-to the specification of the wrapped pod templates, for example:
-```yaml
-# appwrapper prefix
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: wrapped-pod
-spec:
- components:
- - template:
- # indented pod specification
- apiVersion: v1
- kind: Pod
- metadata:
- name: sample-pod
- spec:
- priorityClassName: high-priority # workload priority
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
-```
-
-Workloads of equal priority are considered for admission by their cluster queue in submission order.
-Higher-priority workloads are considered for admission before lower-priority
-workloads irrespective of their submission time. However, workloads that cannot be
-admitted will not block the admission of newer and/or lower-priority workloads
-(if they fit within the nominal quota of the cluster queue).
-
-To reduce workload churn, Kueue forbids workloads to
-simultaneously utilize both preemption and borrowing to acquire the
-necessary quota to be admitted. Therefore a workload that by itself
-exceeds the nominal quota of its cluster queue will never trigger
-preemption. Similarly, if the combined resources of (a) a pending
-workload and (b) the sum of all already admitted workloads with equal
-or higher priority to the pending workload exceeds the nominal quota
-of their cluster queue, Kueue will not preempt already admitted lower
-priority workloads of that cluster queue to admit the pending
-workload.
-
-When a workload is pending on a cluster queue and admitting that
-workload would still leave the cluster queue at or below its nominal
-quota, Kueue may preempt one or more currently admitted workloads of
-other cluster queues to reclaim the necessary borrowed quota. When such
-preemption is necessary, the decision of which workload(s) to preempt
-is based solely on considering the currently admitted workloads of
-just those cluster queues that are exceeding their nominal
-quota. Workloads admitted by cluster queues that are currently at or
-below their nominal quota will not be preempted.
-
-## Allowed Kinds
-
-MLBatch allows users to directly create the following Kinds of compute
-resources:
- + AppWrapper
- + PyTorchJob (allowed, but recommend to put inside an AppWrapper)
- + RayJob (allowed, but recommend to put inside an AppWrapper)
- + RayCluster (allowed, but recommend to put inside an AppWrapper)
-
-MLBatch also allows users to directly create the following Kinds of
-non-compute resources:
- + Service
- + Secret
- + ConfigMap
- + PersistentVolumeClaim
- + PodGroup (allowed, but recommend to put inside an AppWrapper)
-
-MLBatch allows users to wrap an arbitrary number of one or more of the
-following Kinds inside of an AppWrapper:
- + PyTorchJob
- + RayJob
- + RayCluster
- + Deployment
- + StatefulSet
- + Pod
- + Job
- + ServiceAccount
- + Service
- + Secret
- + ConfigMap
- + PersistentVolumeClaim
- + PodGroup
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 261eeb9..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/QUOTA_MAINTENANCE.md b/QUOTA_MAINTENANCE.md
deleted file mode 100644
index 5b475d9..0000000
--- a/QUOTA_MAINTENANCE.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Quota Maintenance
-
-A *team* in MLBatch is a group of users that share a resource quota.
-
-In Kueue, the `ClusterQueue` is the abstraction used to define a pool
-of resources (`cpu`, `memory`, `nvidia.com/gpu`, etc.) that is
-available to a team. A `LocalQueue` is the abstraction used by
-members of the team to submit workloads to a `ClusterQueue` for
-execution using those resources.
-
-Kubernetes built-in `ResourceQuotas` should not be used for resources that
-are being managed by `ClusterQueues`. The two quota systems are incompatible.
-
-We strongly recommend maintaining a simple relationship between
-between teams, namespaces, `ClusterQueues` and `LocalQueues`. Each
-team should assigned to their own namespace that contains a single
-`LocalQueue` which is configured to be the only `LocalQueue` that
-targets the team's `ClusterQueue`.
-
-The quotas assigned to a `ClusterQueue` can be dynamically adjusted by
-a cluster admin at any time. Adjustments to quotas only impact queued
-workloads; workloads already admitted for execution are not impacted
-by quota adjustments.
-
-For Kueue quotas to be effective, the sum of all quotas for each managed
-resource (`cpu`, `memory`, `nvidia.com/gpu`, `pods`) must be maintained to
-remain less than or equal to the available cluster capacity for this resource.
-Concretely, for cluster with 256 NVIDIA GPUs dedicated to MLBatch users, the
-cumulative `nomimalQuota` for the `nvidia.com/gpu` resource should be 256 or
-less. Quotas should be reduced when the available capacity is reduced whether
-because of failures or due to the allocation of resources to non-batch
-workloads.
-
-To facilitate the necessary quota adjustments, we recommend setting up
-a dedicated `ClusterQueue` for slack capacity that other `ClusterQueues`
-can borrow from. This queue should not be associated with any team,
-project, namespace, or local queue. Its `lendingLimit` should be adjusted
-dynamically to reflect changes in cluster capacity. If sized
-appropriately, this queue will make adjustments to other cluster
-queues unnecessary for small cluster capacity changes. The figure
-below shows this recommended setup for an MLBatch cluster with three
-teams. Beginning with RHOAI 2.12 (AppWrapper v0.23), the dynamic
-adjustment of the Slack `ClusterQueue` `lendingLimit` can be
-configured to be fully automated.
-
-
-Every resource name occurring in the resource requests or limits of a workload
-must be covered by a `ClusterQueue` intended to admit the workload, even if the
-requested resource count is zero. For example. a `ClusterQueue` must cover
-`nvidia.com/roce_gdr`, possibly with an empty quota, to admit a `PyTorchJob`
-requesting:
-```yaml
- resources:
- requests:
- cpu: 1
- memory: 256Mi
- nvidia.com/roce_gdr: 0
- limits:
- cpu: 1
- memory: 256Mi
- nvidia.com/roce_gdr: 0
-```
diff --git a/README.md b/README.md
index bad8ac0..379026f 100644
--- a/README.md
+++ b/README.md
@@ -1,75 +1,3 @@
-# MLBatch
+This project's GitHub pages are only used as a Helm repository.
-This repository describes the [setup](SETUP.md) and [use](USAGE.md) of the
-MLBatch queuing and quota management system on OpenShift and Kubernetes clusters. MLBatch
-leverages [Kueue](https://kueue.sigs.k8s.io), the [Kubeflow Training
-Operator](https://www.kubeflow.org/docs/components/training/),
-[KubeRay](https://docs.ray.io/en/latest/cluster/kubernetes/index.html), and the
-[Codeflare Operator](https://github.com/project-codeflare/codeflare-operator)
-from [Red Hat OpenShift
-AI](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
-MLBatch enables [AppWrappers](https://project-codeflare.github.io/appwrapper/)
-and adds
-[Coscheduler](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/README.md).
-MLBatch includes a number of configuration steps to help these components work
-in harmony and support large workloads on large clusters.
-
-MLBatch handles the queuing and dispatching of batch workloads on OpenShift and Kubernetes
-clusters. It enforces team quotas at the namespace level. It automates the
-borrowing and reclamation of unused quotas across teams. Teams can use
-priorities within their namespaces without impact on other teams. Using
-AppWrappers to submit workloads activates a number of fault detection and
-recovery capabilities, including automatically detecting failed pods and
-automatically retrying failed workloads. Coscheduler supports gang scheduling
-and minimizes fragmentation by preferentially packing jobs requiring less than a
-full node's worth of GPUs together.
-
-## Cluster Setup
-
-To learn how to setup MLBatch on a cluster and onboard teams see
-[SETUP.md](SETUP.md).
-
-*Quota maintenance* is a key aspect of smoothly administering an MLBatch cluster.
-Cluster admins should carefully read [QUOTA_MAINTENANCE.md](QUOTA_MAINTENANCE.md).
-
-## Running Workloads
-
-To learn how to run workloads on an MLBatch cluster see [USAGE.md](USAGE.md) or
-[CODEFLARE.md](CODEFLARE.md) if you are already familiar with the
-[CodeFlare](https://github.com/project-codeflare) stack for managing AI/ML
-workloads on Kubernetes.
-
-### PyTorchJobs via the MLBatch Helm Chart
-
-Properly configuring a distributed `PyTorchJob` to make effective use of the
-MLBatch system and hardware accelerators (GPUs, RoCE GDR) can be tedious. To
-automate this process, we provide a Helm chart that captures best practices and
-common configuration options. Using this Helm chart helps eliminate common
-mistakes. Please see [pytorchjob-generator](tools/pytorchjob-generator) for
-detailed usage instructions.
-
-## Development Setup
-
-If you will be contributing to the development of the MLBatch project, you must
-setup precommit hooks for your local clone of the repository. Do the following
-once, immediately after cloning this repo:
-```shell
-helm plugin install https://github.com/helm-unittest/helm-unittest.git
-pre-commit install
-```
-
-## License
-
-Copyright 2024 IBM Corporation.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+Please see the [GitHub project](https://github.com/project-codeflare/mlbatch) for all user-facing content.
diff --git a/SETUP.md b/SETUP.md
deleted file mode 100644
index 1b756e9..0000000
--- a/SETUP.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# MLBatch Setup
-
-The MLBatch setup consists of a *cluster setup* to be done once
-and a *team setup* to be repeated for each team that will
-be using the cluster.
-
-Batch users should only be permitted to create AppWrappers or other
-workload Kinds that are natively supported by Kueue. The cluster setup
-defines a `mlbatch-edit` role which enforces these restrictions and
-will be used in the setup process for each team of MLBatch users that
-is onboarded.
-
-This setup has been developed on Red Hat OpenShift 4.14, Red Hat OpenShift 4.16,
-and Kubernetes 1.29 and is intended to support Red Hat OpenShift 4.14 and up
-and/or Kubernetes 1.29 and up.
-
-To start with, recursively clone and enter this repository:
-```sh
-git clone --recursive https://github.com/project-codeflare/mlbatch.git
-cd mlbatch
-```
-
-Detailed instructions and configuration files can be found in subfolders,
-one for each base platform.
-
-## Red Hat OpenShift AI
-
-We recommend using the most recent ***stable*** release of
-Red Hat OpenShift AI as the base platform for MLBatch. Please see
-[Red Hat OpenShift AI Self-Managed Life Cycle](https://access.redhat.com/support/policy/updates/rhoai-sm/lifecycle)
-for the life cycle dates of currently supported ***stable*** and ***fast*** releases.
-
-Instructions are provided for the following Red Hat OpenShift AI ***stable*** releases:
-+ Red Hat OpenShift AI 2.19
- + [RHOAI 2.19 Cluster Setup](./setup.RHOAI-v2.19/CLUSTER-SETUP.md)
- + [RHOAI 2.19 Team Setup](./setup.RHOAI-v2.19/TEAM-SETUP.md)
- + [UPGRADING from RHOAI 2.16](./setup.RHOAI-v2.19/UPGRADE-STABLE.md)
- + [UPGRADING from RHOAI 2.18](./setup.RHOAI-v2.19/UPGRADE-FAST.md)
- + [RHOAI 2.19 Uninstall](./setup.RHOAI-v2.19/UNINSTALL.md)
-+ Red Hat OpenShift AI 2.16
- + [RHOAI 2.16 Cluster Setup](./setup.RHOAI-v2.16/CLUSTER-SETUP.md)
- + [RHOAI 2.16 Team Setup](./setup.RHOAI-v2.16/TEAM-SETUP.md)
- + [UPGRADING from RHOAI 2.13](./setup.RHOAI-v2.16/UPGRADE-STABLE.md)
- + [UPGRADING from RHOAI 2.15](./setup.RHOAI-v2.16/UPGRADE-FAST.md)
- + [RHOAI 2.16 Uninstall](./setup.RHOAI-v2.16/UNINSTALL.md)
-
-Instructions are provided for the following Red Hat OpenShift AI ***fast*** releases:
-+ Red Hat OpenShift AI 2.19
- + [RHOAI 2.19 Cluster Setup](./setup.RHOAI-v2.19/CLUSTER-SETUP.md)
- + [RHOAI 2.19 Team Setup](./setup.RHOAI-v2.19/TEAM-SETUP.md)
- + [UPGRADING from RHOAI 2.18](./setup.RHOAI-v2.19/UPGRADE.md)
- + [RHOAI 2.19 Uninstall](./setup.RHOAI-v2.19/UNINSTALL.md)
-
-## Kubernetes
-
-MLBatch can be installed on any Kubernetes cluster version 1.29 or later
-by following these instructions:
- + [Kubernetes Cluster Setup](./setup.k8s/CLUSTER-SETUP.md)
- + [Kubternets Team Setup](./setup.k8s/TEAM-SETUP.md)
- + [Kubernetes Uninstall](./setup.k8s/UNINSTALL.md)
diff --git a/USAGE.md b/USAGE.md
deleted file mode 100644
index 604697e..0000000
--- a/USAGE.md
+++ /dev/null
@@ -1,393 +0,0 @@
-# MLBatch Quick Start
-
-MLBatch supports `PyTorchJobs`, `RayJobs`, `RayClusters`, as well as
-`AppWrappers`, which can wrap and bundle together resources such as `Pods`,
-`Jobs`, `Deployments`, `StatefulSets`, `ConfigMaps`, or `Secrets`.
-
-This document first explains [queues](#queues) then discusses a few [examples
-workloads](#example-workloads), [monitoring](#monitoring-workloads-and-queues),
-[borrowing](#borrowing-and-reclamation),
-[priorities](#priorities-and-preemption), and
-[fault-tolerance](#fault-tolerance).
-
-It is not required to clone this repository to use an MLBatch system. However,
-if you want local copies of the examples to enable you to easily try then, you
-can recursively clone and enter this repository:
-```sh
-git clone --recursive https://github.com/project-codeflare/mlbatch.git
-cd mlbatch
-```
-
-## PyTorchJobs via the MLBatch Helm Chart
-
-Properly configuring a distributed `PyTorchJob` to make effective use of the
-MLBatch system and hardware accelerators (GPUs, RoCE GDR) can be tedious. To
-automate this process, we provide a Helm chart that captures best practices and
-common configuration options. Using this Helm chart helps eliminate common
-mistakes. Please see [pytorchjob-generator](tools/pytorchjob-generator) for
-detailed usage instructions.
-
-## Generating AppWrappers from Kubernetes YAML files
-
-If you have a Kubernetes YAML file containing one or more
-non-AppWrapper resources (eg Deployments, Pods, Services, etc),
-you can use the [appwrapper-packager](tools/appwrapper-packager) tool
-to generate an AppWrapper yaml containing those resources.
-
-## Queues
-
-All workloads must target a local queue in their namespace. The local queue name
-is specified as a label as follows:
-```yaml
-apiVersion: ???
-kind: ???
-metadata:
- name: ???
- labels:
- kueue.x-k8s.io/queue-name: default-queue # queue name
-```
-In MLBatch, the default local queue name is `default-queue`.
-
-Workloads submitted as `AppWrappers` do not need to explicity specify the local
-queue name as it will be automatically added if missing. However, other workload
-types (`PyTorchJobs`, `RayJobs`, `RayClusters`) must specify the local queue
-name as demonstrated above.
-
-Workloads missing a local queue name will not be admitted. If you forget to
-label the workload, you must either delete and resubmit it or use `oc edit` to
-add the missing label to the metadata section of your workload object.
-
-Submitted workloads are queued and dispatched when enough quota is available,
-which eventually results in the creation of pods that are submitted to the
-cluster's scheduler. By default, this scheduler will scheduler pods one at a
-time and spread pods across nodes to even the load across the cluster. Both
-behaviors are undesirable for large AI workloads such as pre-training jobs.
-MLBatch includes and configures Coscheduler to enable gang scheduling and
-packing. Concretely, Coscheduler as configured will strive to schedule all pods
-in a job at once using a minimal number of nodes.
-
-## Example Workloads
-
-`PytorchJobs`, `RayJobs`, and `RayClusters` may be submitted directly to
-MLBatch. Please note however that these workloads will not benefit from the
-advanced logic provided by `AppWrappers` for instance pertaining to
-[fault-tolerance](#fault-tolerance). Hence, wrapping objects into `AppWrappers`
-is the recommended way of submitting workloads.
-
-### PyTorchJobs
-
-To submit an unwrapped `PyTorchJob` to MLBatch, simply include the queue name:
-```yaml
-apiVersion: kubeflow.org/v1
-kind: PyTorchJob
-metadata:
- name: sample-pytorchjob
- labels:
- kueue.x-k8s.io/queue-name: default-queue # queue name (required)
-spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
- Worker:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
-```
-Try the above with:
-```sh
-oc apply -n team1 -f samples/pytorchjob.yaml
-```
-MLBatch implicitly enables gang scheduling and packing for `PyTorchJobs` by
-configuring the Kubeflow Training Operator to automatically inject the
-necessary scheduling directives into all Pods it creates for `PyTorchJobs`.
-
-### AppWrappers
-
-A `Job`, a `Pod`, or a `Deployment` can be created using an `AppWrapper`, for
-example:
-```yaml
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-job
-spec:
- components:
- - template:
- # job specification
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: sample-job
- spec:
- template:
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 30"]
- resources:
- requests:
- cpu: 1
-```
-Try the above with:
-```sh
-oc apply -n team1 -f samples/job.yaml
-```
-Concretely, the `AppWrapper` adds a simple prefix to the `Job` specification.
-See [AppWrappers](https://project-codeflare.github.io/appwrapper/) for more
-information and use cases.
-
-MLBatch implicitly enables packing for `AppWrappers`. For workloads consisting
-of multiple pods, add a `PodGroup` to enable gang scheduling, for instance:
-```yaml
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-job
-spec:
- components:
- - template:
- # pod group specification
- apiVersion: scheduling.x-k8s.io/v1alpha1
- kind: PodGroup
- metadata:
- name: sample-job
- spec:
- minMember: 2 # replica count
- - template:
- # job specification
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: sample-job
- spec:
- parallelism: 2 # replica count
- completions: 2 # replica count
- template:
- metadata:
- labels:
- scheduling.x-k8s.io/pod-group: sample-job # pod group label
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
-```
-Try the above with:
-```sh
-oc apply -n team1 -f samples/job-with-podgroup.yaml
-```
-
-## Monitoring Workloads and Queues
-
-Check the status of the local queue for the namespace with:
-```sh
-oc -n team1 get localqueue
-```
-```
-NAME CLUSTERQUEUE PENDING WORKLOADS ADMITTED WORKLOADS
-localqueue.kueue.x-k8s.io/default-queue team1-cluster-queue 0 1
-```
-Check the status of the workloads in the namespace with:
-```sh
-oc -n team1 get workloads
-```
-```
-NAME QUEUE ADMITTED BY AGE
-pytorchjob-sample-pytorchjob-9fc41 default-queue team1-cluster-queue 11m
-```
-As usual, replace `get` with `describe` for more details on the local queue or
-workloads. See [Kueue](https://kueue.sigs.k8s.io) for more information.
-
-## Borrowing and Reclamation
-
-A workload can borrow unused quotas from other namespaces if not enough quota is
-available in the team namespace unless disallowed by the `ClusterQueue` of the
-team namespace (`borrowingLimit`) or target namespace(s) (`lendingLimit`).
-
-Borrowed quotas are immediately returned to the target namespace(s) upon
-request. In other words, the submission of a workload in a target namespace will
-preempt borrowers if necessary to obtain the quota requested by the new
-workload.
-
-## Priorities and Preemption
-
-A workload can specify a priority by means of pod priorities, for instance for a
-wrapped job:
-```yaml
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-job
-spec:
- components:
- - template:
- # job specification
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: sample-job
- spec:
- template:
- spec:
- restartPolicy: Never
- priorityClassName: high-priority # workload priority
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
-```
-Workloads of equal priority are considered for admission in submission order.
-Higher-priority workloads are considered for admission before lower-priority
-workloads irrespective of arrival time. However, workloads that cannot be
-admitted will not block the admission of newer and possibly lower-priority
-workloads (if they fit within the quota).
-
-A workload will preempt lower-priority workloads in the same namespace to meet
-its quota if necessary. It may also preempt newer, equal-priority workloads in
-the same namespace.
-
-Preemption across namespaces can only be triggered by the reclamation of
-borrowed quota, which is independent from priorities.
-
-## Fault-tolerance
-
-AppWrappers are the mechanism used by the MLBatch system to automate fault
-detection and retry/recovery of executing workloads. By adding automation, we
-can achieve higher levels of system utilization by greatly reducing the reliance
-on constant human monitoring of workload health. AppWrappers should be used to
-submit all workloads that are intended to run without close human supervision of
-their progress.
-
-```mermaid
----
-title: Overview of AppWrapper Fault Tolerance Phase Transitions
----
-stateDiagram-v2
-
- rn : Running
- s : Succeeded
- f : Failed
- rt : Resetting
- rs : Resuming
-
- %% Happy Path
- rn --> s
-
- %% Requeuing
- rn --> f : Retries Exceeded
- rn --> rt : Workload Unhealthy
- rt --> rs : All Resources Removed
- rs --> rn : All Resources Recreated
-
- classDef quota fill:lightblue
- class rs quota
- class rn quota
- class rt quota
-
- classDef failed fill:pink
- class f failed
-
- classDef succeeded fill:lightgreen
- class s succeeded
-```
-
-Throughout the execution of the workload, the AppWrapper controller
-monitors the number and health of the workload's Pods. It also watches
-the top-level created resources and for selected resources types
-understands how to interpret their status information. This information
-is combined to determine if a workload is unhealthy. A workload can be
-deemed *unhealthy* if any of the following conditions are true:
- + There are a non-zero number of `Failed` Pods.
- + It takes longer than `AdmissionGracePeriod` for the expected
- number of Pods to reach the `Pending` state.
- + It takes longer than the `WarmupGracePeriod` for the expected
- number of Pods to reach the `Running` state.
- + If a non-zero number of `Running` Pods are using resources
- that Autopilot has tagged as `NoExecute`.
- + The status information of a batch/v1 Job or PyTorchJob indicates
- that it has failed.
- + A top-level wrapped resource is externally deleted.
-
-If a workload is determined to be unhealthy by one of the first three
-Pod-level conditions above, the AppWrapper controller first waits for
-a `FailureGracePeriod` to allow the primary resource controller an
-opportunity to react and return the workload to a healthy state. The
-`FailureGracePeriod` is elided for the remaining conditions because the
-primary resource controller is not expected to take any further
-action. If the `FailureGracePeriod` passes and the workload is still
-unhealthy, the AppWrapper controller will *reset* the workload by
-deleting its resources, waiting for a `RetryPausePeriod`, and then
-creating new instances of the resources. During this retry pause, the
-AppWrapper **does not** release the workload's quota; this ensures
-that when the resources are recreated they will still have sufficient
-quota to execute. The number of times an AppWrapper is reset is
-tracked as part of its status; if the number of resets exceeds the
-`RetryLimit`, then the AppWrapper moves into a `Failed` state and its
-resources are deleted (thus finally releasing its quota). External deletion
-of a top-level wrapped resource will cause the AppWrapper to directly enter
-the `Failed` state independent of the `RetryLimit`.
-
-To support debugging `Failed` workloads, an annotation can be added to an
-AppWrapper that adds a `DeletionOnFailureGracePeriod` between the time the
-AppWrapper enters the `Failed` state and when the process of deleting its
-resources begins. Since the AppWrapper continues to consume quota during this
-delayed deletion period, this annotation should be used sparingly and only when
-interactive debugging of the failed workload is being actively pursued.
-
-All child resources for an AppWrapper that successfully completed will be
-automatically deleted after a `SuccessTTLPeriod` after the AppWrapper entered
-the `Succeeded` state.
-
-The parameters of the retry loop described about are configured at the system
-level, but can be customized by the user on a per-AppWrapper basis by adding
-annotations. The table below lists the parameters, gives their default, and the
-annotation that can be used to customize them. The MLBatch Helm chart also
-supports customization these values.
-
-| Parameter | Default Value | Annotation |
-|------------------------------|---------------|------------------------------------------------------------------------|
-| AdmissionGracePeriod | 1 Minute | workload.codeflare.dev.appwrapper/admissionGracePeriodDuration |
-| WarmupGracePeriod | 5 Minutes | workload.codeflare.dev.appwrapper/warmupGracePeriodDuration |
-| FailureGracePeriod | 1 Minute | workload.codeflare.dev.appwrapper/failureGracePeriodDuration |
-| RetryPausePeriod | 90 Seconds | workload.codeflare.dev.appwrapper/retryPausePeriodDuration |
-| RetryLimit | 3 | workload.codeflare.dev.appwrapper/retryLimit |
-| DeletionOnFailureGracePeriod | 0 Seconds | workload.codeflare.dev.appwrapper/deletionOnFailureGracePeriodDuration |
-| SuccessTTL | 7 Days | workload.codeflare.dev.appwrapper/successTTLDuration |
-| GracePeriodMaximum | 24 Hours | Not Applicable |
-
-The `GracePeriodMaximum` imposes a system-wide upper limit on all other grace
-periods to limit the potential impact of user-added annotations on overall
-system utilization.
diff --git a/figures/CohortWithSlackCQ.png b/figures/CohortWithSlackCQ.png
deleted file mode 100644
index 43b88f6..0000000
Binary files a/figures/CohortWithSlackCQ.png and /dev/null differ
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..a360974
--- /dev/null
+++ b/index.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+This project's GitHub pages are only used as a Helm repository.
+
+Please see the GitHub Project for all user-facing content.
+
+
diff --git a/index.yaml b/index.yaml
new file mode 100644
index 0000000..85a5c93
--- /dev/null
+++ b/index.yaml
@@ -0,0 +1,125 @@
+apiVersion: v1
+entries:
+ pytorchjob-generator:
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2025-04-17T15:21:49.560503007Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: f337335a00711773647ad4badc94bf0fbd223d475f7c15c046da2b294dbea883
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.9.tgz
+ version: 1.1.9
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2025-03-11T15:02:32.514276254Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: acc7b8406e7affb48ce042a26f95d22087e89731626eab6c69f95a9262778642
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.8.tgz
+ version: 1.1.8
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2025-01-17T16:39:09.676825318Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 11ffcfa4de8f8693555b589506176b6b5e2a853a095d5760b59e3f29499e1937
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.7.tgz
+ version: 1.1.7
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-12-19T18:08:41.657987953Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: c234a289e554cfa242961aefe078d30f488acac4186e740acf877be4495c076b
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.6.tgz
+ version: 1.1.6
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-11-20T22:13:02.06061729Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 43fb65fed7977e694561a966e6627f356e614c28099635deeea414e5be520041
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.5.tgz
+ version: 1.1.5
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-11-19T21:41:47.613139069Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 71b89d5be657f20bf73e2e84080f48f508f77f03bbb804f1ce05c32e449eb082
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.4.tgz
+ version: 1.1.4
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-11-11T21:54:33.70476505Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 842697095572d3fc52eeb730eb400f638bd8090b9f114692f0a1c94fe64dd6bf
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.3.tgz
+ version: 1.1.3
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-10-22T21:33:02.486129435Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 4be744b1d7ecea211bec9c620dc78f9ccc5f6498d45ded22446989958d1634af
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.2.tgz
+ version: 1.1.2
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-10-02T16:03:18.169906585Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 9ab17b12786dfef7e4ef80d7e276c912f19951fbacb10bc96b3323d3039c3164
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.1.tgz
+ version: 1.1.1
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-09-17T18:17:07.36957093Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 143775437649ae7354ec3845f8f5bdc328334cb34cb2ab22daeb2afc20534252
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.1.0.tgz
+ version: 1.1.0
+ - apiVersion: v2
+ appVersion: v1beta2
+ created: "2024-06-28T20:59:24.791117701Z"
+ description: An AppWrapper generator for PyTorchJobs
+ digest: 09bfee511e20c00ebfbcd0da2b225aa298ddeed08c851963f2c0330672e59995
+ name: pytorchjob-generator
+ type: application
+ urls:
+ - pytorchjob-generator-1.0.0.tgz
+ version: 1.0.0
+ sakkara-scheduler:
+ - apiVersion: v2
+ appVersion: v0.29.7
+ created: "2025-03-13T00:36:06.475815135Z"
+ description: Deploy sakkara group and topology aware scheduler plugin in a cluster
+ digest: 8cc9e150054292d005e923cbc684be42496defb73e9819152113e07590e7a57c
+ name: sakkara-scheduler
+ type: application
+ urls:
+ - sakkara-scheduler-0.0.1.tgz
+ version: 0.0.1
+generated: "2025-04-17T15:21:49.560549655Z"
diff --git a/pytorchjob-generator-1.0.0.tgz b/pytorchjob-generator-1.0.0.tgz
new file mode 100644
index 0000000..839204d
Binary files /dev/null and b/pytorchjob-generator-1.0.0.tgz differ
diff --git a/pytorchjob-generator-1.1.0.tgz b/pytorchjob-generator-1.1.0.tgz
new file mode 100644
index 0000000..ba17d8b
Binary files /dev/null and b/pytorchjob-generator-1.1.0.tgz differ
diff --git a/pytorchjob-generator-1.1.1.tgz b/pytorchjob-generator-1.1.1.tgz
new file mode 100644
index 0000000..8e3b2ff
Binary files /dev/null and b/pytorchjob-generator-1.1.1.tgz differ
diff --git a/pytorchjob-generator-1.1.2.tgz b/pytorchjob-generator-1.1.2.tgz
new file mode 100644
index 0000000..4d99578
Binary files /dev/null and b/pytorchjob-generator-1.1.2.tgz differ
diff --git a/pytorchjob-generator-1.1.3.tgz b/pytorchjob-generator-1.1.3.tgz
new file mode 100644
index 0000000..37b26c3
Binary files /dev/null and b/pytorchjob-generator-1.1.3.tgz differ
diff --git a/pytorchjob-generator-1.1.4.tgz b/pytorchjob-generator-1.1.4.tgz
new file mode 100644
index 0000000..0927475
Binary files /dev/null and b/pytorchjob-generator-1.1.4.tgz differ
diff --git a/pytorchjob-generator-1.1.5.tgz b/pytorchjob-generator-1.1.5.tgz
new file mode 100644
index 0000000..b9eab6b
Binary files /dev/null and b/pytorchjob-generator-1.1.5.tgz differ
diff --git a/pytorchjob-generator-1.1.6.tgz b/pytorchjob-generator-1.1.6.tgz
new file mode 100644
index 0000000..8023190
Binary files /dev/null and b/pytorchjob-generator-1.1.6.tgz differ
diff --git a/pytorchjob-generator-1.1.7.tgz b/pytorchjob-generator-1.1.7.tgz
new file mode 100644
index 0000000..d0cd79d
Binary files /dev/null and b/pytorchjob-generator-1.1.7.tgz differ
diff --git a/pytorchjob-generator-1.1.8.tgz b/pytorchjob-generator-1.1.8.tgz
new file mode 100644
index 0000000..4896149
Binary files /dev/null and b/pytorchjob-generator-1.1.8.tgz differ
diff --git a/pytorchjob-generator-1.1.9.tgz b/pytorchjob-generator-1.1.9.tgz
new file mode 100644
index 0000000..0c71eb1
Binary files /dev/null and b/pytorchjob-generator-1.1.9.tgz differ
diff --git a/sakkara-scheduler-0.0.1.tgz b/sakkara-scheduler-0.0.1.tgz
new file mode 100644
index 0000000..3e91b76
Binary files /dev/null and b/sakkara-scheduler-0.0.1.tgz differ
diff --git a/samples/job-with-podgroup.yaml b/samples/job-with-podgroup.yaml
deleted file mode 100644
index 653c2ad..0000000
--- a/samples/job-with-podgroup.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-job
-spec:
- components:
- - template:
- # pod group specification
- apiVersion: scheduling.x-k8s.io/v1alpha1
- kind: PodGroup
- metadata:
- name: sample-job
- spec:
- minMember: 2 # replica count
- - template:
- # job specification
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: sample-job
- spec:
- parallelism: 2 # replica count
- completions: 2 # replica count
- template:
- metadata:
- labels:
- scheduling.x-k8s.io/pod-group: sample-job # pod group label
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
diff --git a/samples/job.yaml b/samples/job.yaml
deleted file mode 100644
index 382c2aa..0000000
--- a/samples/job.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-job
-spec:
- components:
- - template:
- # job specification
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: sample-job
- spec:
- template:
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
diff --git a/samples/pod.yaml b/samples/pod.yaml
deleted file mode 100644
index 6406a93..0000000
--- a/samples/pod.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-pod
-spec:
- components:
- - template:
- # pod specification
- apiVersion: v1
- kind: Pod
- metadata:
- name: sample-pod
- spec:
- restartPolicy: Never
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 5"]
- resources:
- requests:
- cpu: 1
diff --git a/samples/pytorchjob-in-appwrapper.yaml b/samples/pytorchjob-in-appwrapper.yaml
deleted file mode 100644
index 2ded053..0000000
--- a/samples/pytorchjob-in-appwrapper.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: sample-aw-pytorchjob
-spec:
- components:
- - template:
- # job specification
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: sample-aw-pytorchjob
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
- Worker:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
diff --git a/samples/pytorchjob.yaml b/samples/pytorchjob.yaml
deleted file mode 100644
index 52f7bee..0000000
--- a/samples/pytorchjob.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-apiVersion: kubeflow.org/v1
-kind: PyTorchJob
-metadata:
- name: sample-pytorchjob
- labels:
- kueue.x-k8s.io/queue-name: default-queue # queue name (required)
-spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
- Worker:
- replicas: 1
- restartPolicy: OnFailure
- template:
- spec:
- containers:
- - name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
- command:
- - "python3"
- - "/opt/pytorch-mnist/mnist.py"
- - "--epochs=1"
- resources:
- requests:
- cpu: 1
diff --git a/scheduler-plugins b/scheduler-plugins
deleted file mode 160000
index 96a3366..0000000
--- a/scheduler-plugins
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 96a33663d5e57edf09d4e2817f841f25caf60229
diff --git a/setup.KubeConEU25/README.md b/setup.KubeConEU25/README.md
deleted file mode 100644
index 03bb1bf..0000000
--- a/setup.KubeConEU25/README.md
+++ /dev/null
@@ -1,1122 +0,0 @@
-# MLBatch Tutorial
-
-MLBatch is the software stack we developed in IBM Research to facilitate the
-setup, administration, and use of Kubernetes clusters dedicated to batch AI/ML
-workloads. It leverages a number of community projects such as
-[Kueue](https://kueue.sigs.k8s.io), [Kubeflow
-Trainer](https://www.kubeflow.org/docs/components/training/),
-[KubeRay](https://docs.ray.io/en/latest/cluster/kubernetes/index.html), and
-[vLLM](https://docs.vllm.ai/en/latest/). It complements them with several
-open-source components born in IBM Research including
-[AutoPilot](https://github.com/IBM/autopilot),
-[AppWrapper](https://project-codeflare.github.io/appwrapper/), and
-[Sakkara](https://github.com/atantawi/4986-kep-sakkara). MLBatch manages teams,
-queues, quotas, and resource allocation. It monitors key cluster components,
-detecting faults and to a degree automating fault recovery.
-
-In this tutorial, we walk through all the steps necessary to setup MLBatch on a
-Kubernetes cluster and run a few example workloads.
-- We configure persistent storage using
-[NFS](https://en.wikipedia.org/wiki/Network_File_System).
-- We deploy MLBatch following the
- [CLUSTER-SETUP.md](../setup.k8s/CLUSTER-SETUP.md) instructions.
-- We configure example teams following the
- [TEAM-SETUP.md](../setup.k8s/TEAM-SETUP.md) instructions.
-- We reconfigure Autopilot to periodically assess the storage class in addition
- to running network and GPU tests. _This is optional._
-- We deploy [Prometheus](https://prometheus.io) and [Grafana
-dashboards](https://grafana.com/grafana/dashboards/) to monitor the health of
-the cluster and GPU utilization. _This is optional._
-- We demonstrate the queuing, quota management, and fault recovery capabilities
- of MLBatch using synthetic workloads.
-- We run example workloads using vLLM, PyTorch, and Ray.
-
-## Cluster Characteristics
-
-Our target cluster comprises three control planes nodes and three worker nodes
-running Kubernetes 1.29, specifically [OpenShift
-4.16](https://docs.openshift.com/container-platform/4.16/release_notes/ocp-4-16-release-notes.html).
-
-
-
-```sh
-kubectl get nodes
-```
-```
-NAME STATUS ROLES AGE VERSION
-pokprod-b93r38s3 Ready worker 5d13h v1.29.11+148a389
-pokprod-b93r39s2 Ready worker 5d12h v1.29.11+148a389
-pokprod-b93r44s0 Ready worker 5d13h v1.29.11+148a389
-pokprod002ctrl0 Ready control-plane,master 5d15h v1.29.11+148a389
-pokprod002ctrl1 Ready control-plane,master 5d15h v1.29.11+148a389
-pokprod002ctrl2 Ready control-plane,master 5d15h v1.29.11+148a389
-```
-Each worker node is equipped with eight [NVIDIA
-H100](https://www.nvidia.com/en-us/data-center/h100/) GPUs.
-```sh
-oc debug node/pokprod-b93r38s3 -- chroot /host lspci -d 10de:
-```
-```
-Starting pod/pokprod-b93r38s3-debug-4bv4j ...
-To use host binaries, run `chroot /host`
-05:00.0 Bridge: NVIDIA Corporation GH100 [H100 NVSwitch] (rev a1)
-06:00.0 Bridge: NVIDIA Corporation GH100 [H100 NVSwitch] (rev a1)
-07:00.0 Bridge: NVIDIA Corporation GH100 [H100 NVSwitch] (rev a1)
-08:00.0 Bridge: NVIDIA Corporation GH100 [H100 NVSwitch] (rev a1)
-18:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-2a:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-3a:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-5d:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-9a:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-ab:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-ba:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-db:00.0 3D controller: NVIDIA Corporation GH100 [H100 SXM5 80GB] (rev a1)
-
-Removing debug pod ...
-```
-For this tutorial, we assume the [NVIDIA GPU
-operator](https://docs.nvidia.com/datacenter/cloud-native/GPU-operator/latest/index.html)
-is already
-[installed](https://docs.nvidia.com/datacenter/cloud-native/GPU-operator/latest/getting-started.html)
-on the cluster. While this cluster is capable of [GPU-direct RDMA (GDR) with
-ROCE (RDMA over Converged
-Ethernet)](https://medium.com/@sunyanan.choochotkaew1/unlocking-GPUdirect-rdma-on-roce-in-kubernetes-based-cluster-on-cloud-through-multi-nic-cni-1e69ffb96296),
-we will not cover or rely on advanced networking configurations in this
-tutorial.
-```sh
-kubectl get operators -A
-```
-```
-NAME AGE
-gpu-operator-certified.nvidia-gpu-operator 18h
-nfd.openshift-nfd 18h
-```
-```sh
-kubectl get node pokprod-b93r38s3 -o yaml | yq .status.capacity
-```
-```
-cpu: "224"
-ephemeral-storage: 1873933640Ki
-hugepages-1Gi: "0"
-hugepages-2Mi: "0"
-memory: 2113411288Ki
-nvidia.com/gpu: "8"
-pods: "250"
-```
-
-
-
-
-## Persistent Storage Setup
-
-We assume storage is available by means of a preexisting
-[NFS](https://en.wikipedia.org/wiki/Network_File_System) server. We configure
-one storage class using the [NFS Subdir External
-Provisioner](https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner).
-
-
-
-```sh
-helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
-helm repo update
-
-helm install -n nfs-provisioner pokprod nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
- --create-namespace \
- --set nfs.server=192.168.98.96 \
- --set nfs.path=/gpfs/fs_ec/pokprod002 \
- --set storageClass.name=nfs-client-pokprod \
- --set storageClass.provisionerName=k8s-sigs.io/pokprod-nfs-subdir-external-provisioner
-```
-Make sure to set the `nfs.server` and `nfs.path` values to the right values for
-your environment.
-```sh
-kubectl get storageclasses
-```
-```
-NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
-nfs-client-pokprod k8s-sigs.io/pokprod-nfs-subdir-external-provisioner Delete Immediate true 11s
-```
-OpenShift clusters require an additional configuration step to permit the
-provisioner pod to mount the storage volume.
-```sh
-oc adm policy add-scc-to-user hostmount-anyuid \
- system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
-```
-
-
-
-## MLBatch Cluster Setup
-
-We deploy MLBatch to the cluster following
-[CLUSTER-SETUP.md](../setup.k8s/CLUSTER-SETUP.md).
-
-
-
-```sh
-# Clone MLBatch repository
-git clone --recursive https://github.com/project-codeflare/mlbatch.git
-cd mlbatch
-
-# Setup priority classes
-kubectl apply -f setup.k8s/mlbatch-priorities.yaml
-
-# Deploy scheduler-plugins
-helm install scheduler-plugins -n scheduler-plugins --create-namespace \
- scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
- --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
-
-# Patch scheduler-plugins pod priorities
-kubectl patch deployment -n scheduler-plugins --type=json \
- --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
-kubectl patch deployment -n scheduler-plugins --type=json \
- --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
-
-# Wait for scheduler-plugins pods to be ready
-kubectl -n scheduler-plugins wait --timeout=300s --for=condition=Available deployments --all
-
-# Create mlbatch-system namespace
-kubectl create namespace mlbatch-system
-
-# Deploy Kubeflow training operator
-kubectl apply --server-side -k setup.k8s/training-operator/coscheduling
-
-# Deploy KubeRay
-kubectl apply --server-side -k setup.k8s/kuberay
-
-# Deploy Kueue
-kubectl apply --server-side -k setup.k8s/kueue
-
-# Wait for Kueue to be ready
-kubectl -n mlbatch-system wait --timeout=300s --for=condition=Available deployments kueue-controller-manager
-
-# Deploy AppWrapper
-kubectl apply --server-side -k setup.k8s/appwrapper/coscheduling
-
-# Deploy Autopilot
-helm repo add autopilot https://ibm.github.io/autopilot/
-helm repo update
-
-helm upgrade -i autopilot -n autopilot autopilot/autopilot --create-namespace
-
-# Create Kueue's default flavor
-kubectl apply -f setup.k8s/default-flavor.yaml
-
-# Setup mlbatch-edit-role
-kubectl apply -f setup.k8s/mlbatch-edit-role.yaml
-```
-We reserve 8 GPUs out of 24 for MLBatch's slack queue.
-```yaml
-kubectl apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: slack-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 224
- - name: "memory"
- nominalQuota: 2000G
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "pods"
- nominalQuota: 100
-EOF
-```
-
-
-
-## MLBatch Teams Setup
-
-We configure team `blue` with user `alice` and `red` with user `bob` following
-[TEAM-SETUP.md](../setup.k8s/TEAM-SETUP.md). Each team has a nominal quota of 8
-GPUs.
-
-
-
-For `alice` in team `blue`:
-```yaml
-# Create namespaces
-kubectl create ns blue
-
-# Label namespace
-kubectl label namespace blue mlbatch-team-namespace=true
-
-# Create cluster queue
-kubectl -n blue apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: blue-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 224
- - name: "memory"
- nominalQuota: 2000G
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "pods"
- nominalQuota: 100
-EOF
-
-# Create default queue for namespace
-kubectl apply -n blue -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: blue-cluster-queue
-EOF
-
-# Authorize alice
-kubectl -n blue apply -f- << EOF
-kind: RoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: alice
-subjects:
- - apiGroup: rbac.authorization.k8s.io
- kind: User
- name: alice
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: mlbatch-edit
-EOF
-```
-For `bob` in team `red`:
-```yaml
-kubectl create ns red
-
-kubectl label namespace red mlbatch-team-namespace=true
-
-kubectl apply -n red -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: red-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 224
- - name: "memory"
- nominalQuota: 2000G
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "pods"
- nominalQuota: 100
-EOF
-
-kubectl apply -n red -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: red-cluster-queue
-EOF
-
-kubectl -n red apply -f- << EOF
-kind: RoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: bob
-subjects:
- - apiGroup: rbac.authorization.k8s.io
- kind: User
- name: bob
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: mlbatch-edit
-EOF
-```
-While we gave permissions to Kubernetes users `alice` and `bob`, we have not
-tied these names to any identity provider as the details of this setup are not
-portable. In this tutorial, we will rely on [user
-impersonation](https://kubernetes.io/docs/reference/access-authn-authz/authentication/#user-impersonation)
-with `kubectl` to run as a specific user.
-
-
-
-## Extended Autopilot Setup
-
-Optionally, we configure Autopilot to test PVC creation and deletion with the
-`nfs-client-pokprod` storage class.
-
-
-
-First create the extended Autopilot configuration.
-```sh
-cat << EOF > autopilot-extended.yaml
-env:
- - name: "PERIODIC_CHECKS"
- value: "pciebw,remapped,dcgm,ping,gpupower,pvc"
- - name: "PVC_TEST_STORAGE_CLASS"
- value: "nfs-client-pokprod"
-EOF
-```
-Then reapply the helm chart, this will start a rollout update.
-```sh
-helm upgrade -i autopilot autopilot/autopilot -n autopilot --create-namespace -f autopilot-extended.yaml
-```
-
-
-
-## Monitoring Setup
-
-Optionally, we deploy [Prometheus](https://prometheus.io) and [Grafana
-dashboards](https://grafana.com/grafana/dashboards/) to the cluster.
-
-
-
-We follow the setup provided by the `prometheus-community/kube-prometheus-stack`
-Helm chart.
-
-```sh
-helm repo add prometheus-community https://prometheus-community.github.io/helm-charts && helm repo update
-```
-
-The charts will install: Prometheus, Grafana, Alert Manager, Prometheus Node
-Exporter and Kube State Metrics. We set up the chart with the following:
-
-- Persistent storage for Prometheus, Grafana and Alert Manager;
-- Override the Prometheus Node Exporter port;
-- Disable CRDs creation as they are already present.
-
-You may leave the CRDs creation on, along with the default Node Exporter pod.
-These changes are needed when deploying a separate Prometheus instance in
-OpenShift.
-
-```sh
-cat << EOF > config.yaml
-crds:
- enabled: false
-
-prometheus-node-exporter:
- service:
- port: 9110
-
-alertmanager:
- alertmanagerSpec:
- storage:
- volumeClaimTemplate:
- spec:
- storageClassName: nfs-client-pokprod
- accessModes: ["ReadWriteOnce"]
- resources:
- requests:
- storage: 50Gi
-
-prometheus:
- prometheusSpec:
- storageSpec:
- volumeClaimTemplate:
- spec:
- storageClassName: nfs-client-pokprod
- accessModes: ["ReadWriteOnce"]
- resources:
- requests:
- storage: 50Gi
- emptyDir:
- medium: Memory
-
-grafana:
- persistence:
- enabled: true
- type: sts
- storageClassName: "nfs-client-pokprod"
- accessModes:
- - ReadWriteOnce
- size: 20Gi
- finalizers:
- - kubernetes.io/pvc-protection
-EOF
-
-helm upgrade -i kube-prometheus-stack -n prometheus prometheus-community/kube-prometheus-stack --create-namespace -f config.yaml
-```
-
-If deploying on OpenShift based systems, you need to assign the privileged
-security context to the service accounts that are created by the helm chart.
-
-```sh
-oc adm policy add-scc-to-user privileged system:serviceaccount:prometheus:kube-prometheus-stack-admission system:serviceaccount:prometheus:kube-prometheus-stack-alertmanager system:serviceaccount:prometheus:kube-prometheus-stack-grafana system:serviceaccount:prometheus:kube-prometheus-stack-kube-state-metrics system:serviceaccount:prometheus:kube-prometheus-stack-operator system:serviceaccount:prometheus:kube-prometheus-stack-prometheus system:serviceaccount:prometheus:kube-prometheus-stack-prometheus-node-exporter
-```
-
-You should expect the following pods:
-
-```sh
-kubectl get pods
-```
-```sh
-NAME READY STATUS RESTARTS AGE
-alertmanager-kube-prometheus-stack-alertmanager-0 2/2 Running 0 16m
-kube-prometheus-stack-grafana-0 3/3 Running 0 16m
-kube-prometheus-stack-kube-state-metrics-6f76b98d89-pxs69 1/1 Running 0 16m
-kube-prometheus-stack-operator-7fbfc985bb-mm9bk 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-44llp 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-95gp8 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-dxf5f 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-f45dx 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-pfrzk 1/1 Running 0 16m
-kube-prometheus-stack-prometheus-node-exporter-zpfzb 1/1 Running 0 16m
-prometheus-kube-prometheus-stack-prometheus-0 2/2 Running 0 16m
-```
-
-To access the Grafana dashboard on `localhost:3000`:
-
-```sh
-kubectl -n prometheus get secrets kube-prometheus-stack-grafana -o jsonpath="{.data.admin-password}" | base64 -d ; echo
-```
-```sh
-export POD_NAME=$(kubectl -n prometheus get pod -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=kube-prometheus-stack" -oname)
- kubectl -n prometheus port-forward $POD_NAME 3000
-```
-
-To import NVidia and Autopilot metrics, from the Grafana dashboard:
-
-- Select the `+` drop down menu on the top right, and **Import dashboard**
-- In the `Grafana.com dashboard URL or ID` box, add
- [https://grafana.com/grafana/dashboards/23123-autopilot-metrics/](https://grafana.com/grafana/dashboards/23123-autopilot-metrics/)
- and click Load, then repeat with the NVidia dashboard
- [https://grafana.com/grafana/dashboards/12239-nvidia-dcgm-exporter-dashboard/](https://grafana.com/grafana/dashboards/12239-nvidia-dcgm-exporter-dashboard/)
-
-To visualize the metrics, we need to label the service monitor objects in both
-`autopilot` and `nvidia-GPU-operator` namespaces with the Prometheus release
-name.
-
-```sh
-kubectl label servicemonitors.monitoring.coreos.com -n autopilot autopilot-metrics-monitor release=kube-prometheus-stack --overwrite
-```
-```sh
-kubectl label servicemonitors.monitoring.coreos.com -n nvidia-gpu-operator nvidia-dcgm-exporter gpu-operator nvidia-node-status-exporter release=kube-prometheus-stack --overwrite
-```
-
-
-
-## Workload Management
-
-We will now demonstrate the queuing, quota management, and fault recovery capabilities of MLBatch
-using synthetic workloads.
-
-
-For this portion of the tutorial, we will use variations on the simple batch/v1 Job shown below.
-All variations will create multiple pods, each requesting some number of GPUs, and sleep for
-a specified interval before completing successfully.
-
-```yaml
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- generateName:
- labels:
- kueue.x-k8s.io/queue-name: default-queue
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- generateName:
- spec:
- completions:
- parallelism:
- template:
- spec:
- restartPolicy: Never
- terminationGracePeriodSeconds: 0
- priorityClassName:
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 600"]
- resources:
- limits:
- nvidia.com/gpu: 4
-```
-
-We will use four types of jobs:
-
-| Job Type | Priority | Duration | Number of Pods | GPU Usage |
-|----------|----------|----------|----------------|------------|
-| short | normal | 30s | 2 | 2 X 4 = 8 |
-| normal | normal | 600s | 2 | 2 X 4 = 8 |
-| important| high | 600s | 2 | 2 x 4 = 8 |
-| large | normal | 600s | 4 | 4 x 4 = 16 |
-
-### Queuing
-
-First, Alice will submit a burst of short running jobs that exceeds
-the number of available GPUs in the cluster. The excess jobs will
-suspended by Kueue and admitted in turn as resources become available.
-
-```sh
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/short.yaml -n blue --as alice
-```
-
-Since no one else is using the cluster, Alice is able to utilize
-both her blue team's quota of 8 GPUs and to borrow all 8 GPUs from the red team's quota
-and the 8 GPUs allocated to the slack cluster queue. During this part of the demo,
-we will start with 3 admitted jobs and 5 pending jobs on the blue cluster queue. Over
-the next two minutes, the queue will drain as the short running jobs complete and the
-next pending job is admitted.
-
-### Borrowing and Preemption
-
-Alice will now submit 4 normal jobs. Again, with borrowing, three of these jobs
-will be able to run immediately and the 4th job will be queued.
-
-```sh
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n blue --as alice
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n blue --as alice
-```
-
-Alice can use priorities to ensure her important jobs run quickly.
-
-```sh
-kubectl create -f ./setup.KubeConEU25/sample-jobs/important.yaml -n blue --as alice
-```
-
-One of Alice's normal jobs is automatically suspended and put back on the queue of
-waiting jobs to make its resource available for her high priority job.
-
-Finally Bob on the red team arrives at work and submits two jobs.
-
-```sh
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n red --as bob
-kubectl create -f ./setup.KubeConEU25/sample-jobs/normal.yaml -n red --as bob
-```
-
-Kueue ensures that Bob has immediate access to his team's allocated quota
-by evicting borrowing jobs. One of Alice's running
-jobs is quickly suspended and returned to her team's queue of pending jobs.
-
-### Fault Tolerance
-
-In this scenario, we will start fresh with an empty cluster. Alice will submit
-a single large job:
-
-```sh
-kubectl create -f ./setup.KubeConEU25/sample-jobs/large.yaml -n blue --as alice
-```
-
-After the job is running, we will simulate Autopilot detecting a serious GPU failure
-on by labeling a Node:
-
-```sh
- kubectl label node autopilot.ibm.com/gpuhealth=EVICT --overwrite
-```
-
-MLBatch will automatically trigger a reset of all running jobs with Pods on
-the impacted node. This reset first does a clean removal of all of the job's
-Pods and then creates fresh versions of them. Since MLBatch automatically injects
-the Kubernetes affinities shown below into all Pods it creates for user workloads,
-the Kubernetes scheduler will avoid scheduling the new Pods on the impacted Node.
-```yaml
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
-```
-
-
-
-## Example Workloads
-
-We now will now run some sample workloads that are representative of what is run
-on an AI GPU Cluster.
-
-### Batch Inference with vLLM
-
-In this example, `alice` runs a batch inference workload using
-[vLLM](https://docs.vllm.ai/en/latest/) to serve IBM's
-[granite-3.2-8b-instruct](https://huggingface.co/ibm-granite/granite-3.2-8b-instruct)
-model.
-
-
-
-First, `alice` creates a persistent volume claim to cache the model weights on
-first invocation so that subsequent instantiations of the model will reuse the
-cached model weights.
-```yaml
-kubectl apply --as alice -n blue -f- << EOF
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- name: granite-3.2-8b-instruct
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 50Gi
- storageClassName: nfs-client-pokprod
-EOF
-```
-The workload wraps a Kubernetes Job in an AppWrapper. The Job consists of one
-Pod with two containers. The `vllm` container runs the inference runtime using
-an upstream `vllm-openai` image. The `load-generator` container submits a random
-series of requests to the inference runtime and reports a number of metrics such
-as _Time to First Token_ (TTFT) and _Time per Output Token_ (TPOT).
-```yaml
-kubectl apply --as alice -n blue -f- << EOF
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: batch-inference
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- name: batch-inference
- spec:
- template:
- metadata:
- labels:
- app: batch-inference
- spec:
- restartPolicy: Never
- containers:
- - name: vllm
- image: quay.io/tardieu/vllm-openai:v0.7.3 # vllm/vllm-openai:v0.7.3
- command:
- # serve model and wait for halt signal
- - sh
- - -c
- - |
- vllm serve ibm-granite/granite-3.2-8b-instruct &
- until [ -f /.config/halt ]; do sleep 1; done
- ports:
- - containerPort: 8000
- resources:
- requests:
- cpu: 4
- memory: 64Gi
- nvidia.com/gpu: 1
- limits:
- cpu: 4
- memory: 64Gi
- nvidia.com/gpu: 1
- volumeMounts:
- - name: cache
- mountPath: /.cache
- - name: config
- mountPath: /.config
- - name: load-generator
- image: quay.io/tardieu/vllm-benchmarks:v0.7.3
- command:
- # wait for vllm, submit batch of requests, send halt signal
- - sh
- - -c
- - |
- until nc -zv localhost 8000; do sleep 1; done;
- python3 benchmark_serving.py \
- --model=ibm-granite/granite-3.2-8b-instruct \
- --backend=vllm \
- --dataset-name=random \
- --random-input-len=128 \
- --random-output-len=128 \
- --max-concurrency=16 \
- --num-prompts=512;
- touch /.config/halt
- volumeMounts:
- - name: cache
- mountPath: /.cache
- - name: config
- mountPath: /.config
- volumes:
- - name: cache
- persistentVolumeClaim:
- claimName: granite-3.2-8b-instruct
- - name: config
- emptyDir: {}
-EOF
-```
-The two containers are synchronized as follows: `load-generator` waits for
-`vllm` to be ready to accept requests and, upon completion of the batch, signals
-`vllm` to make it quit.
-
-Stream the logs of the `vllm` container with:
-```sh
-kubectl logs --as alice -n blue -l app=batch-inference -c vllm -f
-```
-Stream the logs of the `load-generator` container with:
-```sh
-kubectl logs --as alice -n blue -l app=batch-inference -c load-generator -f
-```
-Delete the complete workload with:
-```sh
-kubectl delete --as alice -n blue appwrapper batch-inference
-```
-
-
-
-### Pre-Training with PyTorch
-
-In this example, `alice` uses the [Kubeflow Trainer](https://github.com/kubeflow/trainer)
-to run a job that uses [PyTorch](https://pytorch.org) to train a machine learning model.
-
-
-
-This example was constructed by converting a [PyTorch tutorial on FSDP](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html)
-into a KubeFlow Trainer [notebook](./sample-jobs/pytorch-training.ipynb) that we used to generate
-the yaml for a `PyTorchJob`. The YAML generated by running the notebook was then put inside an
-`AppWrapper` using MLBatch's [awpack tool](../tools/appwrapper-packager/awpack.py) to produce the final
-[pytorch-training.yaml](sample-jobs/pytorch-training.yaml) that will apply to run the workload.
-
-```sh
-kubectl apply --as alice -n blue -f ./setup.KubeConEU25/sample-jobs/pytorch-training.yaml
-```
-
-This will create 2 Pods, each requesting 2 GPUs. On our cluster, it will take about 30 seconds
-to execute this training workload. We can check on the status of the PyTorchJob by using the command:
-
-```sh
-kubectl get pytorchjob -n blue --watch
-```
-
-After the jobs completes, we can get the log of the worker Pod with
-
-```sh
-kubectl logs mnist-training-worker-0 -n blue
-```
-
-At the beginning of the log we can see messages from each Python process
-with its rank information:
-```sh
-...
-FSDP Training for WORLD_SIZE: 4, RANK: 3, LOCAL_RANK: 1
-...
-FSDP Training for WORLD_SIZE: 4, RANK: 2, LOCAL_RANK: 0
-```
-And at the end of the log, we can see the messages from the `LOCAL_RANK` `0`
-process summarizing each epoch:
-```sh
-...
-
-Train Epoch: 1 Loss: 0.247396
-Test set: Average loss: 0.0498, Accuracy: 9824/10000 (98.24%)
-
-Train Epoch: 2 Loss: 0.070375
-Test set: Average loss: 0.0355, Accuracy: 9874/10000 (98.74%)
-
-Train Epoch: 3 Loss: 0.047944
-Test set: Average loss: 0.0291, Accuracy: 9900/10000 (99.00%)
-
-Train Epoch: 4 Loss: 0.038316
-Test set: Average loss: 0.0282, Accuracy: 9906/10000 (99.06%)
-
-Train Epoch: 5 Loss: 0.032751
-Test set: Average loss: 0.0276, Accuracy: 9906/10000 (99.06%)
-
-Train Epoch: 6 Loss: 0.028068
-Test set: Average loss: 0.0275, Accuracy: 9905/10000 (99.05%)
-
-Train Epoch: 7 Loss: 0.028161
-Test set: Average loss: 0.0254, Accuracy: 9916/10000 (99.16%)
-
-Train Epoch: 8 Loss: 0.025051
-Test set: Average loss: 0.0260, Accuracy: 9911/10000 (99.11%)
-
-Train Epoch: 9 Loss: 0.023851
-Test set: Average loss: 0.0264, Accuracy: 9916/10000 (99.16%)
-
-Train Epoch: 10 Loss: 0.023334
-Test set: Average loss: 0.0255, Accuracy: 9916/10000 (99.16%)
-```
-
-When we are all done, we can delete the completed `AppWrapper` with:
-
-```sh
- kubectl delete appwrapper pytorch-mnist-training -n blue
-```
-
-
-### Fine-Tuning with Ray
-
-In this example, `alice` uses [KubeRay](https://github.com/ray-project/kuberay)
-to run a job that uses [Ray](https://github.com/ray-project/ray) to fine tune a
-machine learning model.
-
-This workload is an adaptation from [this blog post by Red Hat](https://developers.redhat.com/articles/2024/09/30/fine-tune-llama-openshift-ai), in turn adapted from [an example on Ray documentation](https://github.com/ray-project/ray/tree/master/doc/source/templates/04_finetuning_llms_with_deepspeed).
-The example is about fine tuning Llama 3.1 with Ray, with DeepSpeed and LoRA.
-
-
-
-Let's set up the environment by installing Ray and cloning the repository
-
-```bash
-uv venv myenv --python 3.12 --seed && source myenv/bin/activate && uv pip install ray datasets
-```
-
-We are going to impersonate Alice in this example.
-
-First, we create the PVC where we can download the model and save the checkpoints from the fine tuning job. We are calling this PVC `finetuning-pvc` and we need to add this to the Ray cluster YAML. If another name is used, please update the `claimName` entry in the Ray cluster definition.
-
-```bash
-kubectl apply --as alice -n blue -f- << EOF
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- name: finetuning-pvc
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 100Gi
- storageClassName: nfs-client-pokprod
-EOF
-```
-
-Now, let's create an AppWrapper version of the Ray cluster. Notice that:
-
-- We are using the container image `quay.io/rhoai/ray:2.35.0-py311-cu121-torch24-fa26` from Red Hat, but you can use the images from DockerHub if preferred
-- We are setting the number of worker replicas to `7`. Since we want to run on one GPU node, we are assigning one to the Ray Head pod, and one each to the 7 worker pods.
-
-```bash
-cd tools/appwrapper-packager/
-cat << EOF > ray.yaml
-apiVersion: ray.io/v1
-kind: RayCluster
-metadata:
- name: ray
-spec:
- headGroupSpec:
- enableIngress: false
- rayStartParams:
- block: 'true'
- dashboard-host: 0.0.0.0
- num-gpus: '1'
- resources: '"{}"'
- serviceType: ClusterIP
- template:
- metadata: {}
- spec:
- containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- image: 'quay.io/rhoai/ray:2.35.0-py311-cu121-torch24-fa26'
- imagePullPolicy: Always
- lifecycle:
- preStop:
- exec:
- command:
- - /bin/sh
- - '-c'
- - ray stop
- name: ray-head
- ports:
- - containerPort: 6379
- name: gcs
- protocol: TCP
- - containerPort: 8265
- name: dashboard
- protocol: TCP
- - containerPort: 10001
- name: client
- protocol: TCP
- resources:
- limits:
- cpu: '16'
- memory: 256G
- nvidia.com/gpu: '1'
- requests:
- cpu: '16'
- memory: 128G
- nvidia.com/gpu: '1'
- volumeMounts:
- - mountPath: /model
- name: model
- volumes:
- - name: model
- persistentVolumeClaim:
- claimName: finetuning-pvc
- rayVersion: 2.35.0
- workerGroupSpecs:
- - groupName: small-group-ray
- rayStartParams:
- block: 'true'
- num-gpus: '1'
- resources: '"{}"'
- replicas: 7
- scaleStrategy: {}
- template:
- metadata: {}
- spec:
- containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- image: 'quay.io/rhoai/ray:2.35.0-py311-cu121-torch24-fa26'
- imagePullPolicy: Always
- lifecycle:
- preStop:
- exec:
- command:
- - /bin/sh
- - '-c'
- - ray stop
- name: machine-learning
- resources:
- limits:
- cpu: '16'
- memory: 256G
- nvidia.com/gpu: '1'
- requests:
- cpu: '16'
- memory: 128G
- nvidia.com/gpu: '1'
- volumeMounts:
- - mountPath: /model
- name: model
- volumes:
- - name: model
- persistentVolumeClaim:
- claimName: finetuning-pvc
-EOF
-```
-
-Now let's use the tool to create the appwrapper:
-
-```bash
-./awpack.py -o ray-aw.yaml -n ray-appwrapper -i ray.yaml
-```
-
-Now we can submit the job while impersonating Alice
-
-```bash
-kubectl create -f ray-aw.yaml -n blue --as alice
-```
-
-Now that the Ray cluster is set up, first we need to expose the `ray-head` service, as that is the entrypoint for all job submissions. In another terminal, type:
-
-```bash
-kubectl port-forward svc/ray-head-svc 8265:8265 -n blue --as alice
-```
-
-Now we can download the git repository with the fine tuning workload.
-
-```bash
-git clone https://github.com/opendatahub-io/distributed-workloads
-cd distributed-workloads/examples/ray-finetune-llm-deepspeed
-```
-
-We also create a Python program that launches the job in the Ray cluster using the Ray API.
-Notice that:
-
-- We set the `--num-devices=8` as it is the total number of accelerators being used by head and workers
-- we set the `HF_HOME` to the shared PVC, so the model will be downloaded as a single instance and shared among all executors
-- we set `epochs` to just one for a shorter run
-- we use localhost as entry point for submitting Ray jobs as we exposed the service earlier.
-
-```bash
-cat << EOF > finetuning.py
-import create_dataset
-create_dataset.gsm8k_qa_no_tokens_template()
-
-from ray.job_submission import JobSubmissionClient
-
-client = JobSubmissionClient("http://127.0.0.1:8265")
-
-kick_off_pytorch_benchmark = (
- "git clone https://github.com/opendatahub-io/distributed-workloads || true;"
- # Run the benchmark.
- "python ray_finetune_llm_deepspeed.py"
- " --model-name=meta-llama/Meta-Llama-3.1-8B --lora --num-devices=8 --num-epochs=1 --ds-config=./deepspeed_configs/zero_3_offload_optim_param.json --storage-path=/model/ --batch-size-per-device=32 --eval-batch-size-per-device=32"
-)
-
-
-submission_id = client.submit_job(
- entrypoint=kick_off_pytorch_benchmark,
- runtime_env={
- "env_vars": {
- 'HF_HOME': "/model/ray_finetune_llm_deepspeed/cache/",
- },
- 'pip': 'requirements.txt',
- 'working_dir': './',
- "excludes": ["/docs/", "*.ipynb", "*.md"]
- },
-)
-
-print("Use the following command to follow this Job's logs:")
-print(f"ray job logs '{submission_id}' --address http://127.0.0.1:8265 --follow")
-EOF
-python finetuning.py
-```
-The expected output is like the following:
-```bash
-2025-03-24 16:37:53,029 INFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_21ddaa8b13d30deb.zip.
-2025-03-24 16:37:53,030 INFO packaging.py:575 -- Creating a file package for local module './'.
-Use the following command to follow this Job's logs:
-ray job logs 'raysubmit_C6hVCvdhpmapgQB8' --address http://127.0.0.1:8265 --follow
-```
-
-We can now either follow the logs on the terminal with `ray job logs` command, or open the Ray dashboard and follow from there. To access the Ray dashboard from localhost, as we exposed the service earlier.
-
-Once the job is completed, the checkpoint with the fine tuned model is saved in the folder
-```
-/model/meta-llama/Meta-Llama-3.1-8B/TorchTrainer_/TorchTrainer_/checkpoint_
-```
-
diff --git a/setup.KubeConEU25/UNINSTALL.md b/setup.KubeConEU25/UNINSTALL.md
deleted file mode 100644
index 77f9858..0000000
--- a/setup.KubeConEU25/UNINSTALL.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Uninstall Procedure
-
-```sh
-kubectl delete appwrappers --all -A
-kubectl delete pvc -n blue
-kubectl delete pvc -n red
-
-kubectl delete clusterqueues --all -A
-kubectl delete localqueues --all -A
-kubectl delete flavors --all -A
-
-kubectl delete rolebinding -n blue alice
-kubectl delete rolebinding -n red bob
-kubectl delete ns blue red
-
-kubectl delete -k setup.k8s/appwrapper/base
-kubectl delete -k setup.k8s/kueue
-kubectl delete -k setup.k8s/kuberay
-kubectl delete -k setup.k8s/training-operator/base
-kubectl delete ns mlbatch-system
-kubectl delete clusterrole mlbatch-edit
-
-helm uninstall -n scheduler-plugins scheduler-plugins
-kubectl delete ns scheduler-plugins
-
-helm uninstall -n autopilot autopilot
-kubectl delete ns autopilot
-
-helm uninstall -n prometheus kube-prometheus-stack
-kubectl delete pvc -n prometheus --all
-kubectl delete ns prometheus
-
-helm uninstall -n nfs-provisioner pokprod
-kubectl delete ns nfs-provisioner
-
-# OpenShift-specific steps
-
-oc adm policy remove-scc-from-user hostmount-anyuid \
- system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
-
-oc adm policy remove-scc-from-user privileged \
- system:serviceaccount:prometheus:kube-prometheus-stack-admission \
- system:serviceaccount:prometheus:kube-prometheus-stack-alertmanager \
- system:serviceaccount:prometheus:kube-prometheus-stack-grafana \
- system:serviceaccount:prometheus:kube-prometheus-stack-kube-state-metrics \
- system:serviceaccount:prometheus:kube-prometheus-stack-operator \
- system:serviceaccount:prometheus:kube-prometheus-stack-prometheus \
- system:serviceaccount:prometheus:kube-prometheus-stack-prometheus-node-exporter
-```
diff --git a/setup.KubeConEU25/sample-jobs/important.yaml b/setup.KubeConEU25/sample-jobs/important.yaml
deleted file mode 100644
index 2e95d2f..0000000
--- a/setup.KubeConEU25/sample-jobs/important.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- generateName: important
- labels:
- kueue.x-k8s.io/queue-name: default-queue
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- generateName: important
- spec:
- completions: 2
- parallelism: 2
- template:
- spec:
- restartPolicy: Never
- terminationGracePeriodSeconds: 0
- priorityClassName: high-priority
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 600"]
- resources:
- limits:
- nvidia.com/gpu: 4
diff --git a/setup.KubeConEU25/sample-jobs/large.yaml b/setup.KubeConEU25/sample-jobs/large.yaml
deleted file mode 100644
index bd5255a..0000000
--- a/setup.KubeConEU25/sample-jobs/large.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- generateName: large
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- annotations:
- workload.codeflare.dev.appwrapper/retryPausePeriodDuration: 5s
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- generateName: large
- spec:
- completions: 4
- parallelism: 4
- template:
- spec:
- restartPolicy: Never
- terminationGracePeriodSeconds: 0
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 600"]
- resources:
- limits:
- nvidia.com/gpu: 4
diff --git a/setup.KubeConEU25/sample-jobs/normal.yaml b/setup.KubeConEU25/sample-jobs/normal.yaml
deleted file mode 100644
index b02f64d..0000000
--- a/setup.KubeConEU25/sample-jobs/normal.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- generateName: normal
- labels:
- kueue.x-k8s.io/queue-name: default-queue
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- generateName: normal
- spec:
- completions: 2
- parallelism: 2
- template:
- spec:
- restartPolicy: Never
- terminationGracePeriodSeconds: 0
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 600"]
- resources:
- limits:
- nvidia.com/gpu: 4
diff --git a/setup.KubeConEU25/sample-jobs/pytorch-training.ipynb b/setup.KubeConEU25/sample-jobs/pytorch-training.ipynb
deleted file mode 100644
index 2fd86b6..0000000
--- a/setup.KubeConEU25/sample-jobs/pytorch-training.ipynb
+++ /dev/null
@@ -1,450 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Tune Model on MNIST dataset using PyTorchJob and FSDP"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This Notebook will tune a small model on the MNIST dataset using FSDP.\n",
- "\n",
- "This Notebook will use **4** GPUs to train the model on 2 Nodes. This example is based on [the official PyTorch FSDP tutorial](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## FSDP with multi-node multi-worker training"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This Notebook demonstrates multi-node, multi-worker distributed training with Fully Sharded Data Parallel (FSDP) and PyTorchJob.\n",
- "\n",
- "When a model is trained with FSDP, the GPU memory footprint is smaller compare to Distributed Data Parallel (DDP),\n",
- "as the model parameters are sharded across GPU devices.\n",
- "\n",
- "This enables training of very large models that would otherwise be impossible to fit on a single GPU device.\n",
- "\n",
- "Check this guide to learn more about PyTorch FSDP: https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Install the required packages\n",
- "\n",
- "Install the Kubeflow Training Python SDK."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# TODO (andreyvelich): Use the release version of SDK.\n",
- "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create script to train using MNIST using FSDP\n",
- "\n",
- "We need to wrap our fine-tuning script in a function to create Kubeflow PyTorchJob."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "def train_function(parameters):\n",
- " import os\n",
- " import time\n",
- " import functools\n",
- "\n",
- " import torch\n",
- " import torch.nn as nn\n",
- " import torch.nn.functional as F\n",
- " import torch.optim as optim\n",
- " from torchvision import datasets, transforms\n",
- "\n",
- " from torch.optim.lr_scheduler import StepLR\n",
- "\n",
- " import torch.distributed as dist\n",
- " import torch.distributed as dist\n",
- " import torch.multiprocessing as mp\n",
- " from torch.nn.parallel import DistributedDataParallel as DDP\n",
- " from torch.utils.data.distributed import DistributedSampler\n",
- " from torch.distributed.fsdp import FullyShardedDataParallel as FSDP\n",
- " from torch.distributed.fsdp.fully_sharded_data_parallel import (\n",
- " CPUOffload,\n",
- " BackwardPrefetch,\n",
- " )\n",
- " from torch.distributed.fsdp.wrap import (\n",
- " size_based_auto_wrap_policy,\n",
- " enable_wrap,\n",
- " wrap,\n",
- " )\n",
- "\n",
- " class Net(nn.Module):\n",
- " def __init__(self):\n",
- " super(Net, self).__init__()\n",
- " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n",
- " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n",
- " self.dropout1 = nn.Dropout(0.25)\n",
- " self.dropout2 = nn.Dropout(0.5)\n",
- " self.fc1 = nn.Linear(9216, 128)\n",
- " self.fc2 = nn.Linear(128, 10)\n",
- "\n",
- " def forward(self, x):\n",
- "\n",
- " x = self.conv1(x)\n",
- " x = F.relu(x)\n",
- " x = self.conv2(x)\n",
- " x = F.relu(x)\n",
- " x = F.max_pool2d(x, 2)\n",
- " x = self.dropout1(x)\n",
- " x = torch.flatten(x, 1)\n",
- " x = self.fc1(x)\n",
- " x = F.relu(x)\n",
- " x = self.dropout2(x)\n",
- " x = self.fc2(x)\n",
- " output = F.log_softmax(x, dim=1)\n",
- " return output\n",
- " \n",
- "\n",
- " def train(args, model, rank, world_size, train_loader, optimizer, epoch, sampler=None):\n",
- " model.train()\n",
- " ddp_loss = torch.zeros(2).to(rank)\n",
- " if sampler:\n",
- " sampler.set_epoch(epoch)\n",
- " for batch_idx, (data, target) in enumerate(train_loader):\n",
- " data, target = data.to(rank), target.to(rank)\n",
- " optimizer.zero_grad()\n",
- " output = model(data)\n",
- " loss = F.nll_loss(output, target, reduction='sum')\n",
- " loss.backward()\n",
- " optimizer.step()\n",
- " ddp_loss[0] += loss.item()\n",
- " ddp_loss[1] += len(data)\n",
- "\n",
- " dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)\n",
- " if rank == 0:\n",
- " print('Train Epoch: {} \\tLoss: {:.6f}'.format(epoch, ddp_loss[0] / ddp_loss[1]))\n",
- " \n",
- " def test(model, rank, world_size, test_loader):\n",
- " model.eval()\n",
- " correct = 0\n",
- " ddp_loss = torch.zeros(3).to(rank)\n",
- " with torch.no_grad():\n",
- " for data, target in test_loader:\n",
- " data, target = data.to(rank), target.to(rank)\n",
- " output = model(data)\n",
- " ddp_loss[0] += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n",
- " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n",
- " ddp_loss[1] += pred.eq(target.view_as(pred)).sum().item()\n",
- " ddp_loss[2] += len(data)\n",
- "\n",
- " dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)\n",
- "\n",
- " if rank == 0:\n",
- " test_loss = ddp_loss[0] / ddp_loss[2]\n",
- " print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\\n'.format(\n",
- " test_loss, int(ddp_loss[1]), int(ddp_loss[2]),\n",
- " 100. * ddp_loss[1] / ddp_loss[2]))\n",
- "\n",
- "\n",
- " # [1] Setup PyTorch distributed and get the distributed parameters.\n",
- " torch.manual_seed(parameters[\"seed\"])\n",
- " dist.init_process_group(\"nccl\")\n",
- " local_rank = int(os.environ[\"LOCAL_RANK\"])\n",
- " rank = dist.get_rank()\n",
- " world_size = dist.get_world_size()\n",
- "\n",
- " # Local rank identifies the GPU number inside the pod.\n",
- " torch.cuda.set_device(local_rank)\n",
- "\n",
- " print(\n",
- " f\"FSDP Training for WORLD_SIZE: {world_size}, RANK: {rank}, LOCAL_RANK: {local_rank}\"\n",
- " )\n",
- "\n",
- " transform=transforms.Compose([\n",
- " transforms.ToTensor(),\n",
- " transforms.Normalize((0.1307,), (0.3081,))\n",
- " ])\n",
- "\n",
- " dataset1 = datasets.MNIST('../data', train=True, download=True,\n",
- " transform=transform)\n",
- " dataset2 = datasets.MNIST('../data', train=False,\n",
- " transform=transform)\n",
- "\n",
- " sampler1 = DistributedSampler(dataset1, rank=rank, num_replicas=world_size, shuffle=True)\n",
- " sampler2 = DistributedSampler(dataset2, rank=rank, num_replicas=world_size)\n",
- "\n",
- " train_kwargs = {'batch_size': parameters[\"batch-size\"], 'sampler': sampler1}\n",
- " test_kwargs = {'batch_size': parameters[\"test-batch-size\"], 'sampler': sampler2}\n",
- " cuda_kwargs = {'num_workers': 2,\n",
- " 'pin_memory': True,\n",
- " 'shuffle': False}\n",
- " train_kwargs.update(cuda_kwargs)\n",
- " test_kwargs.update(cuda_kwargs)\n",
- "\n",
- " train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n",
- " test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)\n",
- " my_auto_wrap_policy = functools.partial(\n",
- " size_based_auto_wrap_policy, min_num_params=100\n",
- " )\n",
- "\n",
- " init_start_event = torch.cuda.Event(enable_timing=True)\n",
- " init_end_event = torch.cuda.Event(enable_timing=True)\n",
- "\n",
- " model = Net().to(local_rank)\n",
- "\n",
- " model = FSDP(model)\n",
- "\n",
- " optimizer = optim.Adadelta(model.parameters(), lr=parameters[\"lr\"])\n",
- "\n",
- " scheduler = StepLR(optimizer, step_size=1, gamma=parameters[\"gamma\"])\n",
- " init_start_event.record()\n",
- " for epoch in range(1, parameters[\"epochs\"] + 1):\n",
- " train(parameters, model, local_rank, world_size, train_loader, optimizer, epoch, sampler=sampler1)\n",
- " test(model, local_rank, world_size, test_loader)\n",
- " scheduler.step()\n",
- "\n",
- " init_end_event.record()\n",
- "\n",
- " if rank == 0:\n",
- " init_end_event.synchronize()\n",
- " print(f\"CUDA event elapsed time: {init_start_event.elapsed_time(init_end_event) / 1000}sec\")\n",
- " print(f\"{model}\")\n",
- "\n",
- " if parameters[\"save-model\"]:\n",
- " # use a barrier to make sure training is done on all ranks\n",
- " dist.barrier()\n",
- " states = model.state_dict()\n",
- " if rank == 0:\n",
- " torch.save(states, \"mnist_cnn.pt\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create Kubeflow PyTorchJob to train on MNIST with FSDP\n",
- "\n",
- "Use `TrainingClient()` to create PyTorchJob which will train on **2 workers** using **2 GPU** for each worker.\n",
- "\n",
- "If you don't have enough GPU resources, you can decrease number of workers or number of GPUs per worker."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "from kubeflow.training import TrainingClient\n",
- "\n",
- "job_name = \"mnist-training\"\n",
- "\n",
- "parameters = {\n",
- " \"batch-size\": 64,\n",
- " \"test-batch-size\": 1000,\n",
- " \"epochs\": 10,\n",
- " \"lr\": 1.0,\n",
- " \"gamma\": 0.7,\n",
- " \"seed\": 1,\n",
- " \"save-model\": False,\n",
- "}\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Create the PyTorchJob.\n",
- "TrainingClient().create_job(\n",
- " name=job_name,\n",
- " train_func=train_function,\n",
- " parameters=parameters,\n",
- " num_workers=2, # You can modify number of workers or number of GPUs.\n",
- " num_procs_per_worker=2,\n",
- " resources_per_worker={\"gpu\": 2},\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Check the PyTorchJob conditions\n",
- "\n",
- "Use `TrainingClient()` APIs to get information about created PyTorchJob."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "print(\"PyTorchJob Conditions\")\n",
- "print(TrainingClient().get_job_conditions(job_name))\n",
- "print(\"-\" * 40)\n",
- "\n",
- "# Wait until PyTorchJob has the Running condition.\n",
- "job = TrainingClient().wait_for_job_conditions(\n",
- " job_name,\n",
- " expected_conditions={\"Running\"},\n",
- ")\n",
- "print(\"PyTorchJob is running\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Get the PyTorchJob pod names\n",
- "\n",
- "Since we define 2 workers, PyTorchJob will create 1 master pod and 1 worker pod to run FSDP fine-tuning."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['mnist-training-master-0', 'mnist-training-worker-0']"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "TrainingClient().get_job_pod_names(job_name)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "execution": {
- "iopub.status.busy": "2022-09-01T20:10:25.759950Z",
- "iopub.status.idle": "2022-09-01T20:10:25.760581Z",
- "shell.execute_reply": "2022-09-01T20:10:25.760353Z",
- "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z"
- },
- "tags": []
- },
- "source": [
- "### Get the PyTorchJob training logs\n",
- "\n",
- "Model parameters are sharded across all workers and GPU devices."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "logs, _ = TrainingClient().get_job_logs(job_name, follow=True)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "execution": {
- "iopub.execute_input": "2024-03-01T23:44:15.511173Z",
- "iopub.status.busy": "2024-03-01T23:44:15.510932Z",
- "iopub.status.idle": "2024-03-01T23:44:15.539921Z",
- "shell.execute_reply": "2024-03-01T23:44:15.539352Z",
- "shell.execute_reply.started": "2024-03-01T23:44:15.511155Z"
- },
- "tags": []
- },
- "source": [
- "## Delete the PyTorchJob\n",
- "\n",
- "You can delete the created PyTorchJob."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "TrainingClient().delete_job(name=job_name)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "pt-demo",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/setup.KubeConEU25/sample-jobs/pytorch-training.yaml b/setup.KubeConEU25/sample-jobs/pytorch-training.yaml
deleted file mode 100644
index fc063e1..0000000
--- a/setup.KubeConEU25/sample-jobs/pytorch-training.yaml
+++ /dev/null
@@ -1,346 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: pytorch-mnist-training
- labels:
- kueue.x-k8s.io/queue-name: default-queue
-spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: mnist-training
- spec:
- nprocPerNode: "2"
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- template:
- metadata:
- annotations:
- sidecar.istio.io/inject: "false"
- spec:
- containers:
- - args:
- - |2-
- program_path=$(mktemp -d)
- read -r -d '' SCRIPT << EOM
- def train_function(parameters):
- import os
- import time
- import functools
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from torchvision import datasets, transforms
- from torch.optim.lr_scheduler import StepLR
- import torch.distributed as dist
- import torch.distributed as dist
- import torch.multiprocessing as mp
- from torch.nn.parallel import DistributedDataParallel as DDP
- from torch.utils.data.distributed import DistributedSampler
- from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
- from torch.distributed.fsdp.fully_sharded_data_parallel import (
- CPUOffload,
- BackwardPrefetch,
- )
- from torch.distributed.fsdp.wrap import (
- size_based_auto_wrap_policy,
- enable_wrap,
- wrap,
- )
- class Net(nn.Module):
- def __init__(self):
- super(Net, self).__init__()
- self.conv1 = nn.Conv2d(1, 32, 3, 1)
- self.conv2 = nn.Conv2d(32, 64, 3, 1)
- self.dropout1 = nn.Dropout(0.25)
- self.dropout2 = nn.Dropout(0.5)
- self.fc1 = nn.Linear(9216, 128)
- self.fc2 = nn.Linear(128, 10)
- def forward(self, x):
- x = self.conv1(x)
- x = F.relu(x)
- x = self.conv2(x)
- x = F.relu(x)
- x = F.max_pool2d(x, 2)
- x = self.dropout1(x)
- x = torch.flatten(x, 1)
- x = self.fc1(x)
- x = F.relu(x)
- x = self.dropout2(x)
- x = self.fc2(x)
- output = F.log_softmax(x, dim=1)
- return output
- def train(args, model, rank, world_size, train_loader, optimizer, epoch, sampler=None):
- model.train()
- ddp_loss = torch.zeros(2).to(rank)
- if sampler:
- sampler.set_epoch(epoch)
- for batch_idx, (data, target) in enumerate(train_loader):
- data, target = data.to(rank), target.to(rank)
- optimizer.zero_grad()
- output = model(data)
- loss = F.nll_loss(output, target, reduction='sum')
- loss.backward()
- optimizer.step()
- ddp_loss[0] += loss.item()
- ddp_loss[1] += len(data)
- dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)
- if rank == 0:
- print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, ddp_loss[0] / ddp_loss[1]))
- def test(model, rank, world_size, test_loader):
- model.eval()
- correct = 0
- ddp_loss = torch.zeros(3).to(rank)
- with torch.no_grad():
- for data, target in test_loader:
- data, target = data.to(rank), target.to(rank)
- output = model(data)
- ddp_loss[0] += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
- pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
- ddp_loss[1] += pred.eq(target.view_as(pred)).sum().item()
- ddp_loss[2] += len(data)
- dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)
- if rank == 0:
- test_loss = ddp_loss[0] / ddp_loss[2]
- print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
- test_loss, int(ddp_loss[1]), int(ddp_loss[2]),
- 100. * ddp_loss[1] / ddp_loss[2]))
- # [1] Setup PyTorch distributed and get the distributed parameters.
- torch.manual_seed(parameters["seed"])
- dist.init_process_group("nccl")
- local_rank = int(os.environ["LOCAL_RANK"])
- rank = dist.get_rank()
- world_size = dist.get_world_size()
- # Local rank identifies the GPU number inside the pod.
- torch.cuda.set_device(local_rank)
- print(
- f"FSDP Training for WORLD_SIZE: {world_size}, RANK: {rank}, LOCAL_RANK: {local_rank}"
- )
- transform=transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.1307,), (0.3081,))
- ])
- dataset1 = datasets.MNIST('/tmp/data', train=True, download=True,
- transform=transform)
- dataset2 = datasets.MNIST('/tmp/data', train=False,
- transform=transform)
- sampler1 = DistributedSampler(dataset1, rank=rank, num_replicas=world_size, shuffle=True)
- sampler2 = DistributedSampler(dataset2, rank=rank, num_replicas=world_size)
- train_kwargs = {'batch_size': parameters["batch-size"], 'sampler': sampler1}
- test_kwargs = {'batch_size': parameters["test-batch-size"], 'sampler': sampler2}
- cuda_kwargs = {'num_workers': 2,
- 'pin_memory': True,
- 'shuffle': False}
- train_kwargs.update(cuda_kwargs)
- test_kwargs.update(cuda_kwargs)
- train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
- test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
- my_auto_wrap_policy = functools.partial(
- size_based_auto_wrap_policy, min_num_params=100
- )
- init_start_event = torch.cuda.Event(enable_timing=True)
- init_end_event = torch.cuda.Event(enable_timing=True)
- model = Net().to(local_rank)
- model = FSDP(model)
- optimizer = optim.Adadelta(model.parameters(), lr=parameters["lr"])
- scheduler = StepLR(optimizer, step_size=1, gamma=parameters["gamma"])
- init_start_event.record()
- for epoch in range(1, parameters["epochs"] + 1):
- train(parameters, model, local_rank, world_size, train_loader, optimizer, epoch, sampler=sampler1)
- test(model, local_rank, world_size, test_loader)
- scheduler.step()
- init_end_event.record()
- if rank == 0:
- init_end_event.synchronize()
- print(f"CUDA event elapsed time: {init_start_event.elapsed_time(init_end_event) / 1000}sec")
- print(f"{model}")
- if parameters["save-model"]:
- # use a barrier to make sure training is done on all ranks
- dist.barrier()
- states = model.state_dict()
- if rank == 0:
- torch.save(states, "mnist_cnn.pt")
- train_function({'batch-size': 64, 'test-batch-size': 1000, 'epochs': 10, 'lr': 1.0, 'gamma': 0.7, 'seed': 1, 'save-model': False})
- EOM
- printf "%s" "$SCRIPT" > "$program_path/ephemeral_script.py"
- torchrun "$program_path/ephemeral_script.py"
- command:
- - bash
- - -c
- image: docker.io/pytorch/pytorch:2.1.2-cuda11.8-cudnn8-runtime
- name: pytorch
- resources:
- limits:
- nvidia.com/gpu: "2"
- requests:
- nvidia.com/gpu: "2"
- Worker:
- replicas: 1
- template:
- metadata:
- annotations:
- sidecar.istio.io/inject: "false"
- spec:
- containers:
- - args:
- - |2-
- program_path=$(mktemp -d)
- read -r -d '' SCRIPT << EOM
- def train_function(parameters):
- import os
- import time
- import functools
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from torchvision import datasets, transforms
- from torch.optim.lr_scheduler import StepLR
- import torch.distributed as dist
- import torch.distributed as dist
- import torch.multiprocessing as mp
- from torch.nn.parallel import DistributedDataParallel as DDP
- from torch.utils.data.distributed import DistributedSampler
- from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
- from torch.distributed.fsdp.fully_sharded_data_parallel import (
- CPUOffload,
- BackwardPrefetch,
- )
- from torch.distributed.fsdp.wrap import (
- size_based_auto_wrap_policy,
- enable_wrap,
- wrap,
- )
- class Net(nn.Module):
- def __init__(self):
- super(Net, self).__init__()
- self.conv1 = nn.Conv2d(1, 32, 3, 1)
- self.conv2 = nn.Conv2d(32, 64, 3, 1)
- self.dropout1 = nn.Dropout(0.25)
- self.dropout2 = nn.Dropout(0.5)
- self.fc1 = nn.Linear(9216, 128)
- self.fc2 = nn.Linear(128, 10)
- def forward(self, x):
- x = self.conv1(x)
- x = F.relu(x)
- x = self.conv2(x)
- x = F.relu(x)
- x = F.max_pool2d(x, 2)
- x = self.dropout1(x)
- x = torch.flatten(x, 1)
- x = self.fc1(x)
- x = F.relu(x)
- x = self.dropout2(x)
- x = self.fc2(x)
- output = F.log_softmax(x, dim=1)
- return output
- def train(args, model, rank, world_size, train_loader, optimizer, epoch, sampler=None):
- model.train()
- ddp_loss = torch.zeros(2).to(rank)
- if sampler:
- sampler.set_epoch(epoch)
- for batch_idx, (data, target) in enumerate(train_loader):
- data, target = data.to(rank), target.to(rank)
- optimizer.zero_grad()
- output = model(data)
- loss = F.nll_loss(output, target, reduction='sum')
- loss.backward()
- optimizer.step()
- ddp_loss[0] += loss.item()
- ddp_loss[1] += len(data)
- dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)
- if rank == 0:
- print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, ddp_loss[0] / ddp_loss[1]))
- def test(model, rank, world_size, test_loader):
- model.eval()
- correct = 0
- ddp_loss = torch.zeros(3).to(rank)
- with torch.no_grad():
- for data, target in test_loader:
- data, target = data.to(rank), target.to(rank)
- output = model(data)
- ddp_loss[0] += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
- pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
- ddp_loss[1] += pred.eq(target.view_as(pred)).sum().item()
- ddp_loss[2] += len(data)
- dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM)
- if rank == 0:
- test_loss = ddp_loss[0] / ddp_loss[2]
- print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
- test_loss, int(ddp_loss[1]), int(ddp_loss[2]),
- 100. * ddp_loss[1] / ddp_loss[2]))
- # [1] Setup PyTorch distributed and get the distributed parameters.
- torch.manual_seed(parameters["seed"])
- dist.init_process_group("nccl")
- local_rank = int(os.environ["LOCAL_RANK"])
- rank = dist.get_rank()
- world_size = dist.get_world_size()
- # Local rank identifies the GPU number inside the pod.
- torch.cuda.set_device(local_rank)
- print(
- f"FSDP Training for WORLD_SIZE: {world_size}, RANK: {rank}, LOCAL_RANK: {local_rank}"
- )
- transform=transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.1307,), (0.3081,))
- ])
- dataset1 = datasets.MNIST('/tmp/data', train=True, download=True,
- transform=transform)
- dataset2 = datasets.MNIST('/tmp/data', train=False,
- transform=transform)
- sampler1 = DistributedSampler(dataset1, rank=rank, num_replicas=world_size, shuffle=True)
- sampler2 = DistributedSampler(dataset2, rank=rank, num_replicas=world_size)
- train_kwargs = {'batch_size': parameters["batch-size"], 'sampler': sampler1}
- test_kwargs = {'batch_size': parameters["test-batch-size"], 'sampler': sampler2}
- cuda_kwargs = {'num_workers': 2,
- 'pin_memory': True,
- 'shuffle': False}
- train_kwargs.update(cuda_kwargs)
- test_kwargs.update(cuda_kwargs)
- train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
- test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
- my_auto_wrap_policy = functools.partial(
- size_based_auto_wrap_policy, min_num_params=100
- )
- init_start_event = torch.cuda.Event(enable_timing=True)
- init_end_event = torch.cuda.Event(enable_timing=True)
- model = Net().to(local_rank)
- model = FSDP(model)
- optimizer = optim.Adadelta(model.parameters(), lr=parameters["lr"])
- scheduler = StepLR(optimizer, step_size=1, gamma=parameters["gamma"])
- init_start_event.record()
- for epoch in range(1, parameters["epochs"] + 1):
- train(parameters, model, local_rank, world_size, train_loader, optimizer, epoch, sampler=sampler1)
- test(model, local_rank, world_size, test_loader)
- scheduler.step()
- init_end_event.record()
- if rank == 0:
- init_end_event.synchronize()
- print(f"CUDA event elapsed time: {init_start_event.elapsed_time(init_end_event) / 1000}sec")
- print(f"{model}")
- if parameters["save-model"]:
- # use a barrier to make sure training is done on all ranks
- dist.barrier()
- states = model.state_dict()
- if rank == 0:
- torch.save(states, "mnist_cnn.pt")
- train_function({'batch-size': 64, 'test-batch-size': 1000, 'epochs': 10, 'lr': 1.0, 'gamma': 0.7, 'seed': 1, 'save-model': False})
- EOM
- printf "%s" "$SCRIPT" > "$program_path/ephemeral_script.py"
- torchrun "$program_path/ephemeral_script.py"
- command:
- - bash
- - -c
- image: docker.io/pytorch/pytorch:2.1.2-cuda11.8-cudnn8-runtime
- name: pytorch
- resources:
- limits:
- nvidia.com/gpu: "2"
- requests:
- nvidia.com/gpu: "2"
- runPolicy:
- suspend: false
diff --git a/setup.KubeConEU25/sample-jobs/short.yaml b/setup.KubeConEU25/sample-jobs/short.yaml
deleted file mode 100644
index bef54fd..0000000
--- a/setup.KubeConEU25/sample-jobs/short.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- generateName: short
- labels:
- kueue.x-k8s.io/queue-name: default-queue
-spec:
- components:
- - template:
- apiVersion: batch/v1
- kind: Job
- metadata:
- generateName: short
- spec:
- completions: 2
- parallelism: 2
- template:
- spec:
- restartPolicy: Never
- terminationGracePeriodSeconds: 0
- containers:
- - name: busybox
- image: quay.io/project-codeflare/busybox:1.36
- command: ["sh", "-c", "sleep 30"]
- resources:
- limits:
- nvidia.com/gpu: 4
diff --git a/setup.RHOAI-v2.16/CLUSTER-SETUP.md b/setup.RHOAI-v2.16/CLUSTER-SETUP.md
deleted file mode 100644
index a4fcc0a..0000000
--- a/setup.RHOAI-v2.16/CLUSTER-SETUP.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Cluster Setup
-
-The cluster setup installs Red Hat OpenShift AI and configures Scheduler Plugins, Kueue,
-cluster roles, and priority classes.
-
-## Priorities
-
-Create `default-priority`, `high-priority`, and `low-priority` priority classes:
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-priorities.yaml
-```
-
-## Scheduler Configuration
-
-MLBatch configures Kubernetes scheduling to accomplish two objectives:
-+ Obtaining gang (all or nothing) scheduling for multi-Pod workloads.
-+ Packing Pods whose GPU request is less than the number of GPUs on a Node to
- maximize the number of Nodes available for Pods that request all the GPUs on a Node.
-
-This is done by installing the Coscheduling out-of-tree scheduler plugin and configuring
-the default NodeResourcesFit scheduler plugin to pack in the GPU dimension.
-
-
-```sh
-helm install scheduler-plugins --namespace scheduler-plugins --create-namespace \
- scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
- --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
-```
-Patch scheduler-plugins pod priorities:
-```sh
-oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.16/scheduler-priority-patch.yaml scheduler-plugins-controller
-oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.16/scheduler-priority-patch.yaml scheduler-plugins-scheduler
-```
-
-
-
-## Red Hat OpenShift AI
-
-Create the Red Hat OpenShift AI subscription:
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-subscription.yaml
-````
-Identify install plan:
-```sh
-oc get ip -n redhat-ods-operator
-```
-```
-NAMESPACE NAME CSV APPROVAL APPROVED
-redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false
-```
-Approve install plan replacing the generated plan name below with the actual
-value:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kmh8w
-```
-Create DSC Initialization:
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-dsci.yaml
-```
-Create Data Science Cluster:
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-dsc.yaml
-```
-The provided DSCI and DSC are intended to install a minimal set of Red Hat OpenShift
-AI managed components: `codeflare`, `kueue`, `ray`, and `trainingoperator`. The
-remaining components such as `dashboard` can be optionally enabled.
-
-The configuration of the managed components differs from the default Red Hat OpenShift
-AI configuration as follows:
-- Kubeflow Training Operator:
- - `gang-scheduler-name` is set to `scheduler-plugins-scheduler`,
-- Kueue:
- - `manageJobsWithoutQueueName` is enabled,
- - `batch/job` integration is disabled,
- - `waitForPodsReady` is disabled,
- - `LendingLimit` feature gate is enabled,
- - `fairSharing` is enabled,
- - `enableClusterQueueResources` metrics is enabled,
-- Codeflare operator:
- - the AppWrapper controller is enabled and configured as follows:
- - `userRBACAdmissionCheck` is disabled,
- - `schedulerName` is set to `scheduler-plugins-scheduler`,
- - `queueName` is set to `default-queue`,
- - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
-
-
-
-## Autopilot
-
-Helm charts values and how-to for customization can be found [in the official documentation](https://github.com/IBM/autopilot/blob/main/helm-charts/autopilot/README.md). As-is, Autopilot will run on GPU nodes.
-
-- Add the Autopilot Helm repository
-
-```bash
-helm repo add autopilot https://ibm.github.io/autopilot/
-helm repo update
-```
-
-- Install the chart (idempotent command). The config file is for customizing the helm values and it is optional.
-
-```bash
-helm upgrade autopilot autopilot/autopilot --install --namespace=autopilot --create-namespace -f your-config.yml
-```
-
-### Enabling Prometheus metrics
-
-After completing the installation, manually label the namespace to enable metrics to be scraped by Prometheus with the following command:
-
-```bash
-oc label ns autopilot openshift.io/cluster-monitoring=true
-```
-
-The `ServiceMonitor` labeling is not required.
-
-## Kueue Configuration
-
-Create Kueue's default flavor:
-```sh
-oc apply -f setup.RHOAI-v2.16/default-flavor.yaml
-```
-
-## Cluster Role
-
-Create `mlbatch-edit` role:
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-edit-role.yaml
-```
-
-## Slack Cluster Queue
-
-Create the designated slack `ClusterQueue` which will be used to automate
-minor adjustments to cluster capacity caused by node failures and
-scheduler maintanence.
-```sh
-oc apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: slack-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- - name: "memory"
- nominalQuota: 128Gi
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 1
- - name: "pods"
- nominalQuota: 100
-EOF
-```
-Edit the above quantities to adjust the quota to the desired
-values. Pod counts are optional and can be omitted from the list of
-covered resources. The `lendingLimit` for each resource will be
-dynamically adjusted by the MLBatch system to reflect reduced cluster
-capacity. See [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md) for a
-detailed discussion of the role of the slack `ClusterQueue`.
diff --git a/setup.RHOAI-v2.16/TEAM-SETUP.md b/setup.RHOAI-v2.16/TEAM-SETUP.md
deleted file mode 100644
index 85c9429..0000000
--- a/setup.RHOAI-v2.16/TEAM-SETUP.md
+++ /dev/null
@@ -1,91 +0,0 @@
-# Team Setup
-
-A *team* in MLBatch is a group of users that share a resource quota.
-
-Before setting up your teams and quotas, please read [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md)
-for a discussion of our recommended best practices.
-
-
-Setting up a new team requires the cluster admin to create a project,
-a user group, a quota, a queue, and the required role bindings as described below.
-
-Create project:
-```sh
-oc new-project team1
-```
-Create user group:
-```sh
-oc adm groups new team1-edit-group
-```
-Add users to group for example:
-```sh
-oc adm groups add-users team1-edit-group user1
-```
-Bind cluster role to group in namespace:
-```sh
-oc adm policy add-role-to-group mlbatch-edit team1-edit-group --role-namespace="" --namespace team1
-```
-
-Specify the intended quota for the namespace by creating a `ClusterQueue`:
-```sh
-oc apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: team1-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "memory"
- nominalQuota: 128Gi
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/gpu"
- nominalQuota: 16
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 4
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "pods"
- nominalQuota: 100
- # borrowingLimit: 0
- # lendingLimit: 0
-EOF
-```
-Edit the above quantities to adjust the quota to the desired values. Pod counts
-are optional and can be omitted from the list of covered resources.
-
-Uncomment all `borrowingLimit` lines to prevent this namespace from borrowing
-quota from other namespaces. Uncomment all `lendingLimit` lines to prevent other
-namespaces from borrowing quota from this namespace.
-
-Create a `LocalQueue` to bind the `ClusterQueue` to the namespace:
-```sh
-oc apply -n team1 -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: team1-cluster-queue
-EOF
-```
-We recommend naming the local queue `default-queue` as `AppWrappers` will
-default to this queue name.
-
diff --git a/setup.RHOAI-v2.16/UNINSTALL.md b/setup.RHOAI-v2.16/UNINSTALL.md
deleted file mode 100644
index 776045d..0000000
--- a/setup.RHOAI-v2.16/UNINSTALL.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Uninstall
-
-***First, remove all team projects and corresponding cluster queues.***
-
-Then to uninstall the MLBatch controllers and reclaim the corresponding
-namespaces, run:
-```sh
-# OpenShift AI uninstall
-oc delete dsc mlbatch-dsc
-oc delete dsci mlbatch-dsci
-oc delete subscription -n redhat-ods-operator rhods-operator
-oc delete csv -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator
-oc delete crd featuretrackers.features.opendatahub.io \
- dscinitializations.dscinitialization.opendatahub.io \
- datascienceclusters.datasciencecluster.opendatahub.io
-oc delete operators rhods-operator.redhat-ods-operator
-oc delete operatorgroup -n redhat-ods-operator rhods-operator
-oc delete namespace redhat-ods-applications redhat-ods-monitoring redhat-ods-operator
-
-# Coscheduler uninstall
-helm uninstall -n scheduler-plugins scheduler-plugins
-oc delete namespace scheduler-plugins
-```
diff --git a/setup.RHOAI-v2.16/UPGRADE-FAST.md b/setup.RHOAI-v2.16/UPGRADE-FAST.md
deleted file mode 100644
index eeb9bb3..0000000
--- a/setup.RHOAI-v2.16/UPGRADE-FAST.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Upgrading from RHOAI 2.15
-
-These instructions assume you installed and configured RHOAI 2.15 following
-the MLBatch [install instructions for RHOAI-v2.15](../setup.RHOAI-v2.15/CLUSTER-SETUP.md)
-or the [upgrade instructions for RHOAI-V2.15](../setup.RHOAI-v2.15/UPGRADE.md)
-
-Your subscription will have automatically created an unapproved
-install plan to upgrade to RHOAI 2.16.
-
-Before beginning, verify that the expected install plan exists:
-```sh
-oc get ip -n redhat-ods-operator
-```
-Typical output would be:
-```sh
-NAME CSV APPROVAL APPROVED
-install-kpzzl rhods-operator.2.16.0 Manual false
-install-nqrbp rhods-operator.2.15.0 Manual true
-```
-
-Assuming the install plan exists you can begin the upgrade process.
-
-First, update the MLBatch modifications to the default RHOAI configuration maps.
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-upgrade-configmaps.yaml
-```
-
-There are no MLBatch modifications to the default RHOAI configuration maps
-beyond those already made in previous installs. Therefore, you can simply
-approve the install plan replacing the example plan name below with the actual
-value on your cluster:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
-```
diff --git a/setup.RHOAI-v2.16/UPGRADE-STABLE.md b/setup.RHOAI-v2.16/UPGRADE-STABLE.md
deleted file mode 100644
index e17651e..0000000
--- a/setup.RHOAI-v2.16/UPGRADE-STABLE.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Upgrading from RHOAI 2.13
-
-These instructions assume you installed and configured RHOAI 2.13 following
-the MLBatch [install instructions for RHOAI-v2.13](../setup.RHOAI-v2.13/CLUSTER-SETUP.md).
-
-Your subscription will have automatically created an unapproved
-install plan to upgrade to RHOAI 2.16.
-
-Before beginning, verify that the expected install plan exists:
-```sh
-oc get ip -n redhat-ods-operator
-```
-Typical output would be:
-```sh
-NAME CSV APPROVAL APPROVED
-install-kpzzl rhods-operator.2.16.0 Manual false
-install-nqrbp rhods-operator.2.13.0 Manual true
-```
-
-Assuming the install plan exists you can begin the upgrade process.
-
-First, update the MLBatch modifications to the default RHOAI configuration maps.
-```sh
-oc apply -f setup.RHOAI-v2.16/mlbatch-upgrade-configmaps.yaml
-```
-
-Second, approve the install plan replacing the example plan name below with the actual
-value on your cluster:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
-```
diff --git a/setup.RHOAI-v2.16/default-flavor.yaml b/setup.RHOAI-v2.16/default-flavor.yaml
deleted file mode 100644
index 6cbccf3..0000000
--- a/setup.RHOAI-v2.16/default-flavor.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ResourceFlavor
-metadata:
- name: default-flavor
diff --git a/setup.RHOAI-v2.16/mlbatch-dsc.yaml b/setup.RHOAI-v2.16/mlbatch-dsc.yaml
deleted file mode 100644
index 66336bc..0000000
--- a/setup.RHOAI-v2.16/mlbatch-dsc.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-apiVersion: datasciencecluster.opendatahub.io/v1
-kind: DataScienceCluster
-metadata:
- name: mlbatch-dsc
-spec:
- components:
- codeflare:
- managementState: Managed
- dashboard:
- managementState: Removed
- datasciencepipelines:
- managementState: Removed
- kserve:
- managementState: Removed
- serving:
- ingressGateway:
- certificate:
- type: SelfSigned
- managementState: Removed
- name: knative-serving
- kueue:
- managementState: Managed
- modelmeshserving:
- managementState: Removed
- ray:
- managementState: Managed
- trainingoperator:
- managementState: Managed
- trustyai:
- managementState: Removed
- workbenches:
- managementState: Removed
diff --git a/setup.RHOAI-v2.16/mlbatch-dsci.yaml b/setup.RHOAI-v2.16/mlbatch-dsci.yaml
deleted file mode 100644
index 77785c3..0000000
--- a/setup.RHOAI-v2.16/mlbatch-dsci.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-apiVersion: dscinitialization.opendatahub.io/v1
-kind: DSCInitialization
-metadata:
- name: mlbatch-dsci
-spec:
- applicationsNamespace: redhat-ods-applications
- monitoring:
- managementState: Managed
- namespace: redhat-ods-monitoring
- serviceMesh:
- managementState: Removed
- trustedCABundle:
- customCABundle: ""
- managementState: Managed
diff --git a/setup.RHOAI-v2.16/mlbatch-edit-role.yaml b/setup.RHOAI-v2.16/mlbatch-edit-role.yaml
deleted file mode 100644
index fd86cc6..0000000
--- a/setup.RHOAI-v2.16/mlbatch-edit-role.yaml
+++ /dev/null
@@ -1,151 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: mlbatch-edit
-rules:
-- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - apps
- resources:
- - deployments
- - statefulsets
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - services
- - secrets
- - configmaps
- - persistentvolumeclaims
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - kueue.x-k8s.io
- resources:
- - "*"
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - kubeflow.org
- resources:
- - pytorchjobs
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ray.io
- resources:
- - rayjobs
- - rayclusters
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - workload.codeflare.dev
- resources:
- - appwrappers
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - scheduling.k8s.io
- resources:
- - priorityclasses
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - scheduling.x-k8s.io
- resources:
- - podgroups
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ""
- resources:
- - events
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - namespaces
- - pods/log
- verbs:
- - get
-- apiGroups:
- - ""
- resources:
- - pods/exec
- - pods/portforward
- verbs:
- - create
-- apiGroups:
- - route.openshift.io
- resources:
- - routes
- verbs:
- - get
- - list
- - watch
- - delete
-- apiGroups:
- - ""
- - project.openshift.io
- resources:
- - projects
- verbs:
- - get
diff --git a/setup.RHOAI-v2.16/mlbatch-priorities.yaml b/setup.RHOAI-v2.16/mlbatch-priorities.yaml
deleted file mode 100644
index 77c8f3b..0000000
--- a/setup.RHOAI-v2.16/mlbatch-priorities.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: low-priority
-value: 1
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class for all lower priority jobs."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: default-priority
-value: 5
-preemptionPolicy: PreemptLowerPriority
-globalDefault: true
-description: "This is the priority class for all jobs (default priority)."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: high-priority
-value: 10
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class defined for highly important jobs that would evict lower and default priority jobs."
diff --git a/setup.RHOAI-v2.16/mlbatch-subscription.yaml b/setup.RHOAI-v2.16/mlbatch-subscription.yaml
deleted file mode 100644
index 0f5eec1..0000000
--- a/setup.RHOAI-v2.16/mlbatch-subscription.yaml
+++ /dev/null
@@ -1,310 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
- name: redhat-ods-operator
----
-apiVersion: v1
-kind: Namespace
-metadata:
- name: redhat-ods-applications
----
-apiVersion: operators.coreos.com/v1
-kind: OperatorGroup
-metadata:
- name: rhods-operator
- namespace: redhat-ods-operator
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-codeflare
- namespace: redhat-ods-operator
-data:
- manager.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: manager
- namespace: system
- spec:
- selector:
- matchLabels:
- app.kubernetes.io/name: codeflare-operator
- app.kubernetes.io/part-of: codeflare
- replicas: 1
- template:
- metadata:
- annotations:
- kubectl.kubernetes.io/default-container: manager
- labels:
- app.kubernetes.io/name: codeflare-operator
- app.kubernetes.io/part-of: codeflare
- spec:
- priorityClassName: system-node-critical
- securityContext:
- runAsNonRoot: true
- # TODO(user): For common cases that do not require escalating privileges
- # it is recommended to ensure that all your Pods/Containers are restrictive.
- # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
- # Please uncomment the following code if your project does NOT have to work on old Kubernetes
- # versions < 1.20 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
- # seccompProfile:
- # type: RuntimeDefault
- containers:
- - command:
- - /manager
- image: $(codeflare_operator_controller_image)
- imagePullPolicy: Always
- name: manager
- securityContext:
- allowPrivilegeEscalation: false
- capabilities:
- drop:
- - "ALL"
- env:
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- ports:
- - containerPort: 8080
- protocol: TCP
- name: metrics
- livenessProbe:
- httpGet:
- path: /healthz
- port: 8081
- initialDelaySeconds: 15
- periodSeconds: 20
- readinessProbe:
- httpGet:
- path: /readyz
- port: 8081
- initialDelaySeconds: 5
- periodSeconds: 10
- resources:
- limits:
- cpu: "1"
- memory: 1Gi
- requests:
- cpu: "1"
- memory: 1Gi
- serviceAccountName: controller-manager
- terminationGracePeriodSeconds: 10
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: codeflare-operator-config
- namespace: redhat-ods-applications
-data:
- config.yaml: |
- appwrapper:
- enabled: true
- Config:
- autopilot:
- injectAntiAffinities: true
- monitorNodes: true
- resourceTaints:
- nvidia.com/gpu:
- - key: autopilot.ibm.com/gpuhealth
- value: ERR
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: TESTING
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: EVICT
- effect: NoExecute
- defaultQueueName: default-queue
- enableKueueIntegrations: true
- kueueJobReconciller:
- manageJobsWithoutQueueName: true
- waitForPodsReady:
- blockAdmission: false
- enable: false
- schedulerName: scheduler-plugins-scheduler
- slackQueueName: slack-cluster-queue
- userRBACAdmissionCheck: false
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-kuberay
- namespace: redhat-ods-operator
-data:
- kuberay-operator-image-patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: kuberay-operator
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: kuberay-operator
- image: $(image)
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-kueue
- namespace: redhat-ods-operator
-data:
- controller_manager_config.yaml: |
- apiVersion: config.kueue.x-k8s.io/v1beta1
- kind: Configuration
- health:
- healthProbeBindAddress: :8081
- metrics:
- bindAddress: :8080
- enableClusterQueueResources: true
- webhook:
- port: 9443
- leaderElection:
- leaderElect: true
- resourceName: c1f6bfd2.kueue.x-k8s.io
- controller:
- groupKindConcurrency:
- Job.batch: 5
- Pod: 5
- Workload.kueue.x-k8s.io: 5
- LocalQueue.kueue.x-k8s.io: 1
- ClusterQueue.kueue.x-k8s.io: 1
- ResourceFlavor.kueue.x-k8s.io: 1
- clientConnection:
- qps: 50
- burst: 100
- #pprofBindAddress: :8082
- waitForPodsReady:
- enable: false
- blockAdmission: false
- manageJobsWithoutQueueName: true
- #internalCertManagement:
- # enable: false
- # webhookServiceName: ""
- # webhookSecretName: ""
- integrations:
- frameworks:
- # - "batch/job"
- - "kubeflow.org/mpijob"
- - "ray.io/rayjob"
- - "ray.io/raycluster"
- - "jobset.x-k8s.io/jobset"
- - "kubeflow.org/mxjob"
- - "kubeflow.org/paddlejob"
- - "kubeflow.org/pytorchjob"
- - "kubeflow.org/tfjob"
- - "kubeflow.org/xgboostjob"
- # - "pod"
- externalFrameworks:
- - "AppWrapper.v1beta2.workload.codeflare.dev"
- # podOptions:
- # namespaceSelector:
- # matchExpressions:
- # - key: kubernetes.io/metadata.name
- # operator: NotIn
- # values: [ kube-system, kueue-system ]
- fairSharing:
- enable: true
- preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
- manager_config_patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: controller-manager
- namespace: system
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: manager
- image: $(image)
- args:
- - "--config=/controller_manager_config.yaml"
- - "--zap-log-level=2"
- - "--feature-gates=LendingLimit=true"
- volumeMounts:
- - name: manager-config
- mountPath: /controller_manager_config.yaml
- subPath: controller_manager_config.yaml
- volumes:
- - name: manager-config
- configMap:
- name: manager-config
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-training-operator
- namespace: redhat-ods-operator
-data:
- manager_config_patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: training-operator
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: training-operator
- image: $(image)
- args:
- - "--zap-log-level=2"
- - "--gang-scheduler-name=scheduler-plugins-scheduler"
- resources:
- requests:
- cpu: 100m
- memory: 100Mi
- limits:
- cpu: 500m
- memory: 1000Mi
----
-apiVersion: operators.coreos.com/v1alpha1
-kind: Subscription
-metadata:
- name: rhods-operator
- namespace: redhat-ods-operator
-spec:
- channel: stable
- installPlanApproval: Manual
- name: rhods-operator
- source: redhat-operators
- sourceNamespace: openshift-marketplace
- startingCSV: rhods-operator.2.16.0
- config:
- env:
- - name: "DISABLE_DSC_CONFIG"
- volumeMounts:
- - name: mlbatch-codeflare
- mountPath: /opt/manifests/codeflare/manager/manager.yaml
- subPath: manager.yaml
- - name: mlbatch-kuberay
- mountPath: /opt/manifests/ray/openshift/kuberay-operator-image-patch.yaml
- subPath: kuberay-operator-image-patch.yaml
- - name: mlbatch-kueue
- mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml
- subPath: controller_manager_config.yaml
- - name: mlbatch-kueue
- mountPath: /opt/manifests/kueue/rhoai/manager_config_patch.yaml
- subPath: manager_config_patch.yaml
- - name: mlbatch-training-operator
- mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml
- subPath: manager_config_patch.yaml
- volumes:
- - name: mlbatch-codeflare
- configMap:
- name: mlbatch-codeflare
- - name: mlbatch-kuberay
- configMap:
- name: mlbatch-kuberay
- - name: mlbatch-kueue
- configMap:
- name: mlbatch-kueue
- - name: mlbatch-training-operator
- configMap:
- name: mlbatch-training-operator
diff --git a/setup.RHOAI-v2.16/mlbatch-upgrade-configmaps.yaml b/setup.RHOAI-v2.16/mlbatch-upgrade-configmaps.yaml
deleted file mode 100644
index c111aa4..0000000
--- a/setup.RHOAI-v2.16/mlbatch-upgrade-configmaps.yaml
+++ /dev/null
@@ -1,125 +0,0 @@
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: codeflare-operator-config
- namespace: redhat-ods-applications
-data:
- config.yaml: |
- appwrapper:
- enabled: true
- Config:
- autopilot:
- injectAntiAffinities: true
- monitorNodes: true
- resourceTaints:
- nvidia.com/gpu:
- - key: autopilot.ibm.com/gpuhealth
- value: ERR
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: TESTING
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: EVICT
- effect: NoExecute
- defaultQueueName: default-queue
- enableKueueIntegrations: true
- kueueJobReconciller:
- manageJobsWithoutQueueName: true
- waitForPodsReady:
- blockAdmission: false
- enable: false
- schedulerName: scheduler-plugins-scheduler
- slackQueueName: slack-cluster-queue
- userRBACAdmissionCheck: false
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-kueue
- namespace: redhat-ods-operator
-data:
- controller_manager_config.yaml: |
- apiVersion: config.kueue.x-k8s.io/v1beta1
- kind: Configuration
- health:
- healthProbeBindAddress: :8081
- metrics:
- bindAddress: :8080
- enableClusterQueueResources: true
- webhook:
- port: 9443
- leaderElection:
- leaderElect: true
- resourceName: c1f6bfd2.kueue.x-k8s.io
- controller:
- groupKindConcurrency:
- Job.batch: 5
- Pod: 5
- Workload.kueue.x-k8s.io: 5
- LocalQueue.kueue.x-k8s.io: 1
- ClusterQueue.kueue.x-k8s.io: 1
- ResourceFlavor.kueue.x-k8s.io: 1
- clientConnection:
- qps: 50
- burst: 100
- #pprofBindAddress: :8082
- waitForPodsReady:
- enable: false
- blockAdmission: false
- manageJobsWithoutQueueName: true
- #internalCertManagement:
- # enable: false
- # webhookServiceName: ""
- # webhookSecretName: ""
- integrations:
- frameworks:
- # - "batch/job"
- - "kubeflow.org/mpijob"
- - "ray.io/rayjob"
- - "ray.io/raycluster"
- - "jobset.x-k8s.io/jobset"
- - "kubeflow.org/mxjob"
- - "kubeflow.org/paddlejob"
- - "kubeflow.org/pytorchjob"
- - "kubeflow.org/tfjob"
- - "kubeflow.org/xgboostjob"
- # - "pod"
- externalFrameworks:
- - "AppWrapper.v1beta2.workload.codeflare.dev"
- # podOptions:
- # namespaceSelector:
- # matchExpressions:
- # - key: kubernetes.io/metadata.name
- # operator: NotIn
- # values: [ kube-system, kueue-system ]
- fairSharing:
- enable: true
- preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
- manager_config_patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: controller-manager
- namespace: system
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: manager
- image: $(image)
- args:
- - "--config=/controller_manager_config.yaml"
- - "--zap-log-level=2"
- - "--feature-gates=LendingLimit=true"
- volumeMounts:
- - name: manager-config
- mountPath: /controller_manager_config.yaml
- subPath: controller_manager_config.yaml
- volumes:
- - name: manager-config
- configMap:
- name: manager-config
----
diff --git a/setup.RHOAI-v2.16/scheduler-priority-patch.yaml b/setup.RHOAI-v2.16/scheduler-priority-patch.yaml
deleted file mode 100644
index 278802f..0000000
--- a/setup.RHOAI-v2.16/scheduler-priority-patch.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-- op: add
- path: /spec/template/spec/priorityClassName
- value: system-node-critical
diff --git a/setup.RHOAI-v2.19/CLUSTER-SETUP.md b/setup.RHOAI-v2.19/CLUSTER-SETUP.md
deleted file mode 100644
index 87046a6..0000000
--- a/setup.RHOAI-v2.19/CLUSTER-SETUP.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Cluster Setup
-
-The cluster setup installs Red Hat OpenShift AI and configures Scheduler Plugins, Kueue,
-cluster roles, and priority classes.
-
-## Priorities
-
-Create `default-priority`, `high-priority`, and `low-priority` priority classes:
-```sh
-oc apply -f setup.RHOAI-v2.19/mlbatch-priorities.yaml
-```
-
-## Scheduler Configuration
-
-MLBatch configures Kubernetes scheduling to accomplish two objectives:
-+ Obtaining gang (all or nothing) scheduling for multi-Pod workloads.
-+ Packing Pods whose GPU request is less than the number of GPUs on a Node to
- maximize the number of Nodes available for Pods that request all the GPUs on a Node.
-
-This is done by installing the Coscheduling out-of-tree scheduler plugin and configuring
-the default NodeResourcesFit scheduler plugin to pack in the GPU dimension.
-
-
-```sh
-helm install scheduler-plugins --namespace scheduler-plugins --create-namespace \
- scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
- --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
-```
-Patch scheduler-plugins pod priorities:
-```sh
-oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.19/scheduler-priority-patch.yaml scheduler-plugins-controller
-oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.19/scheduler-priority-patch.yaml scheduler-plugins-scheduler
-```
-
-
-
-## Red Hat OpenShift AI
-
-Create the Red Hat OpenShift AI subscription:
-```sh
-oc apply -f setup.RHOAI-v2.19/mlbatch-subscription.yaml
-````
-Identify install plan:
-```sh
-oc get ip -n redhat-ods-operator
-```
-```
-NAMESPACE NAME CSV APPROVAL APPROVED
-redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false
-```
-Approve install plan replacing the generated plan name below with the actual
-value:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kmh8w
-```
-Create DSC Initialization:
-```sh
-oc apply -f setup.RHOAI-v2.19/mlbatch-dsci.yaml
-```
-Create Data Science Cluster:
-```sh
-oc apply -f setup.RHOAI-v2.19/mlbatch-dsc.yaml
-```
-The provided DSCI and DSC are intended to install a minimal set of Red Hat OpenShift
-AI managed components: `codeflare`, `kueue`, `ray`, and `trainingoperator`. The
-remaining components such as `dashboard` can be optionally enabled.
-
-The configuration of the managed components differs from the default Red Hat OpenShift
-AI configuration as follows:
-- Kubeflow Training Operator:
- - `gang-scheduler-name` is set to `scheduler-plugins-scheduler`,
-- Kueue:
- - `manageJobsWithoutQueueName` is enabled,
- - `batch/job` integration is disabled,
- - `waitForPodsReady` is disabled,
- - `LendingLimit` feature gate is enabled,
- - `fairSharing` is enabled,
- - `enableClusterQueueResources` metrics is enabled,
-- Codeflare operator:
- - the AppWrapper controller is enabled and configured as follows:
- - `userRBACAdmissionCheck` is disabled,
- - `schedulerName` is set to `scheduler-plugins-scheduler`,
- - `queueName` is set to `default-queue`,
- - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
-
-
-
-## Autopilot
-
-Helm charts values and how-to for customization can be found [in the official documentation](https://github.com/IBM/autopilot/blob/main/helm-charts/autopilot/README.md). As-is, Autopilot will run on GPU nodes.
-
-- Add the Autopilot Helm repository
-
-```bash
-helm repo add autopilot https://ibm.github.io/autopilot/
-helm repo update
-```
-
-- Install the chart (idempotent command). The config file is for customizing the helm values and it is optional.
-
-```bash
-helm upgrade autopilot autopilot/autopilot --install --namespace=autopilot --create-namespace -f your-config.yml
-```
-
-### Enabling Prometheus metrics
-
-After completing the installation, manually label the namespace to enable metrics to be scraped by Prometheus with the following command:
-
-```bash
-oc label ns autopilot openshift.io/cluster-monitoring=true
-```
-
-The `ServiceMonitor` labeling is not required.
-
-## Kueue Configuration
-
-Create Kueue's default flavor:
-```sh
-oc apply -f setup.RHOAI-v2.19/default-flavor.yaml
-```
-
-## Cluster Role
-
-Create `mlbatch-edit` role:
-```sh
-oc apply -f setup.RHOAI-v2.19/mlbatch-edit-role.yaml
-```
-
-## Slack Cluster Queue
-
-Create the designated slack `ClusterQueue` which will be used to automate
-minor adjustments to cluster capacity caused by node failures and
-scheduler maintanence.
-```sh
-oc apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: slack-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- - name: "memory"
- nominalQuota: 128Gi
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 1
- - name: "pods"
- nominalQuota: 100
-EOF
-```
-Edit the above quantities to adjust the quota to the desired
-values. Pod counts are optional and can be omitted from the list of
-covered resources. The `lendingLimit` for each resource will be
-dynamically adjusted by the MLBatch system to reflect reduced cluster
-capacity. See [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md) for a
-detailed discussion of the role of the slack `ClusterQueue`.
diff --git a/setup.RHOAI-v2.19/TEAM-SETUP.md b/setup.RHOAI-v2.19/TEAM-SETUP.md
deleted file mode 100644
index 85c9429..0000000
--- a/setup.RHOAI-v2.19/TEAM-SETUP.md
+++ /dev/null
@@ -1,91 +0,0 @@
-# Team Setup
-
-A *team* in MLBatch is a group of users that share a resource quota.
-
-Before setting up your teams and quotas, please read [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md)
-for a discussion of our recommended best practices.
-
-
-Setting up a new team requires the cluster admin to create a project,
-a user group, a quota, a queue, and the required role bindings as described below.
-
-Create project:
-```sh
-oc new-project team1
-```
-Create user group:
-```sh
-oc adm groups new team1-edit-group
-```
-Add users to group for example:
-```sh
-oc adm groups add-users team1-edit-group user1
-```
-Bind cluster role to group in namespace:
-```sh
-oc adm policy add-role-to-group mlbatch-edit team1-edit-group --role-namespace="" --namespace team1
-```
-
-Specify the intended quota for the namespace by creating a `ClusterQueue`:
-```sh
-oc apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: team1-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "memory"
- nominalQuota: 128Gi
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/gpu"
- nominalQuota: 16
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 4
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "pods"
- nominalQuota: 100
- # borrowingLimit: 0
- # lendingLimit: 0
-EOF
-```
-Edit the above quantities to adjust the quota to the desired values. Pod counts
-are optional and can be omitted from the list of covered resources.
-
-Uncomment all `borrowingLimit` lines to prevent this namespace from borrowing
-quota from other namespaces. Uncomment all `lendingLimit` lines to prevent other
-namespaces from borrowing quota from this namespace.
-
-Create a `LocalQueue` to bind the `ClusterQueue` to the namespace:
-```sh
-oc apply -n team1 -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: team1-cluster-queue
-EOF
-```
-We recommend naming the local queue `default-queue` as `AppWrappers` will
-default to this queue name.
-
diff --git a/setup.RHOAI-v2.19/UNINSTALL.md b/setup.RHOAI-v2.19/UNINSTALL.md
deleted file mode 100644
index 776045d..0000000
--- a/setup.RHOAI-v2.19/UNINSTALL.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Uninstall
-
-***First, remove all team projects and corresponding cluster queues.***
-
-Then to uninstall the MLBatch controllers and reclaim the corresponding
-namespaces, run:
-```sh
-# OpenShift AI uninstall
-oc delete dsc mlbatch-dsc
-oc delete dsci mlbatch-dsci
-oc delete subscription -n redhat-ods-operator rhods-operator
-oc delete csv -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator
-oc delete crd featuretrackers.features.opendatahub.io \
- dscinitializations.dscinitialization.opendatahub.io \
- datascienceclusters.datasciencecluster.opendatahub.io
-oc delete operators rhods-operator.redhat-ods-operator
-oc delete operatorgroup -n redhat-ods-operator rhods-operator
-oc delete namespace redhat-ods-applications redhat-ods-monitoring redhat-ods-operator
-
-# Coscheduler uninstall
-helm uninstall -n scheduler-plugins scheduler-plugins
-oc delete namespace scheduler-plugins
-```
diff --git a/setup.RHOAI-v2.19/UPGRADE-FAST.md b/setup.RHOAI-v2.19/UPGRADE-FAST.md
deleted file mode 100644
index 06db6ab..0000000
--- a/setup.RHOAI-v2.19/UPGRADE-FAST.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Upgrading from RHOAI 2.19
-
-These instructions assume you installed and configured RHOAI 2.18 following
-the MLBatch [install instructions for RHOAI-v2.18](../setup.RHOAI-v2.18/CLUSTER-SETUP.md)
-or the [upgrade instructions for RHOAI-V2.18](../setup.RHOAI-v2.18/UPGRADE.md)
-
-Your subscription will have automatically created an unapproved
-install plan to upgrade to RHOAI 2.19.
-
-Before beginning, verify that the expected install plan exists:
-```sh
-oc get ip -n redhat-ods-operator
-```
-Typical output would be:
-```sh
-NAME CSV APPROVAL APPROVED
-install-kpzzl rhods-operator.2.18.0 Manual false
-install-nqrbp rhods-operator.2.19.0 Manual true
-```
-
-Assuming the install plan exists you can begin the upgrade process.
-
-There are no MLBatch modifications to the default RHOAI configuration maps
-beyond those already made in previous installs. Therefore, you can simply
-approve the install plan replacing the example plan name below with the actual
-value on your cluster:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
-```
diff --git a/setup.RHOAI-v2.19/UPGRADE-STABLE.md b/setup.RHOAI-v2.19/UPGRADE-STABLE.md
deleted file mode 100644
index 10a4cf5..0000000
--- a/setup.RHOAI-v2.19/UPGRADE-STABLE.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Upgrading from RHOAI 2.16
-
-These instructions assume you installed and configured RHOAI 2.16 following
-the MLBatch [install instructions for RHOAI-v2.16](../setup.RHOAI-v2.16/CLUSTER-SETUP.md)
-or the [stable stream upgrade instructions for RHOAI-V2.16](../setup.RHOAI-v2.16/UPGRADE-STABLE.md)
-and are subscribed to the stable channel.
-
-Your subscription will have automatically created an unapproved
-install plan to upgrade to RHOAI 2.19.
-
-Before beginning, verify that the expected install plan exists:
-```sh
-oc get ip -n redhat-ods-operator
-```
-Typical output would be:
-```sh
-NAME CSV APPROVAL APPROVED
-install-kpzzl rhods-operator.2.16.0 Manual false
-install-nqrbp rhods-operator.2.19.0 Manual true
-```
-
-Assuming the install plan exists you can begin the upgrade process.
-
-There are no MLBatch modifications to the default RHOAI configuration maps
-beyond those already made in previous installs. Therefore, you can simply
-approve the install plan replacing the example plan name below with the actual
-value on your cluster:
-```sh
-oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kpzzl
-```
diff --git a/setup.RHOAI-v2.19/default-flavor.yaml b/setup.RHOAI-v2.19/default-flavor.yaml
deleted file mode 100644
index 6cbccf3..0000000
--- a/setup.RHOAI-v2.19/default-flavor.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ResourceFlavor
-metadata:
- name: default-flavor
diff --git a/setup.RHOAI-v2.19/mlbatch-dsc.yaml b/setup.RHOAI-v2.19/mlbatch-dsc.yaml
deleted file mode 100644
index 66336bc..0000000
--- a/setup.RHOAI-v2.19/mlbatch-dsc.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-apiVersion: datasciencecluster.opendatahub.io/v1
-kind: DataScienceCluster
-metadata:
- name: mlbatch-dsc
-spec:
- components:
- codeflare:
- managementState: Managed
- dashboard:
- managementState: Removed
- datasciencepipelines:
- managementState: Removed
- kserve:
- managementState: Removed
- serving:
- ingressGateway:
- certificate:
- type: SelfSigned
- managementState: Removed
- name: knative-serving
- kueue:
- managementState: Managed
- modelmeshserving:
- managementState: Removed
- ray:
- managementState: Managed
- trainingoperator:
- managementState: Managed
- trustyai:
- managementState: Removed
- workbenches:
- managementState: Removed
diff --git a/setup.RHOAI-v2.19/mlbatch-dsci.yaml b/setup.RHOAI-v2.19/mlbatch-dsci.yaml
deleted file mode 100644
index 77785c3..0000000
--- a/setup.RHOAI-v2.19/mlbatch-dsci.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-apiVersion: dscinitialization.opendatahub.io/v1
-kind: DSCInitialization
-metadata:
- name: mlbatch-dsci
-spec:
- applicationsNamespace: redhat-ods-applications
- monitoring:
- managementState: Managed
- namespace: redhat-ods-monitoring
- serviceMesh:
- managementState: Removed
- trustedCABundle:
- customCABundle: ""
- managementState: Managed
diff --git a/setup.RHOAI-v2.19/mlbatch-edit-role.yaml b/setup.RHOAI-v2.19/mlbatch-edit-role.yaml
deleted file mode 100644
index fd86cc6..0000000
--- a/setup.RHOAI-v2.19/mlbatch-edit-role.yaml
+++ /dev/null
@@ -1,151 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: mlbatch-edit
-rules:
-- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - apps
- resources:
- - deployments
- - statefulsets
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - services
- - secrets
- - configmaps
- - persistentvolumeclaims
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - kueue.x-k8s.io
- resources:
- - "*"
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - kubeflow.org
- resources:
- - pytorchjobs
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ray.io
- resources:
- - rayjobs
- - rayclusters
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - workload.codeflare.dev
- resources:
- - appwrappers
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - scheduling.k8s.io
- resources:
- - priorityclasses
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - scheduling.x-k8s.io
- resources:
- - podgroups
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ""
- resources:
- - events
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - namespaces
- - pods/log
- verbs:
- - get
-- apiGroups:
- - ""
- resources:
- - pods/exec
- - pods/portforward
- verbs:
- - create
-- apiGroups:
- - route.openshift.io
- resources:
- - routes
- verbs:
- - get
- - list
- - watch
- - delete
-- apiGroups:
- - ""
- - project.openshift.io
- resources:
- - projects
- verbs:
- - get
diff --git a/setup.RHOAI-v2.19/mlbatch-priorities.yaml b/setup.RHOAI-v2.19/mlbatch-priorities.yaml
deleted file mode 100644
index 77c8f3b..0000000
--- a/setup.RHOAI-v2.19/mlbatch-priorities.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: low-priority
-value: 1
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class for all lower priority jobs."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: default-priority
-value: 5
-preemptionPolicy: PreemptLowerPriority
-globalDefault: true
-description: "This is the priority class for all jobs (default priority)."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: high-priority
-value: 10
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class defined for highly important jobs that would evict lower and default priority jobs."
diff --git a/setup.RHOAI-v2.19/mlbatch-subscription.yaml b/setup.RHOAI-v2.19/mlbatch-subscription.yaml
deleted file mode 100644
index e667279..0000000
--- a/setup.RHOAI-v2.19/mlbatch-subscription.yaml
+++ /dev/null
@@ -1,310 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
- name: redhat-ods-operator
----
-apiVersion: v1
-kind: Namespace
-metadata:
- name: redhat-ods-applications
----
-apiVersion: operators.coreos.com/v1
-kind: OperatorGroup
-metadata:
- name: rhods-operator
- namespace: redhat-ods-operator
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-codeflare
- namespace: redhat-ods-operator
-data:
- manager.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: manager
- namespace: system
- spec:
- selector:
- matchLabels:
- app.kubernetes.io/name: codeflare-operator
- app.kubernetes.io/part-of: codeflare
- replicas: 1
- template:
- metadata:
- annotations:
- kubectl.kubernetes.io/default-container: manager
- labels:
- app.kubernetes.io/name: codeflare-operator
- app.kubernetes.io/part-of: codeflare
- spec:
- priorityClassName: system-node-critical
- securityContext:
- runAsNonRoot: true
- # TODO(user): For common cases that do not require escalating privileges
- # it is recommended to ensure that all your Pods/Containers are restrictive.
- # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
- # Please uncomment the following code if your project does NOT have to work on old Kubernetes
- # versions < 1.20 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
- # seccompProfile:
- # type: RuntimeDefault
- containers:
- - command:
- - /manager
- image: $(codeflare_operator_controller_image)
- imagePullPolicy: Always
- name: manager
- securityContext:
- allowPrivilegeEscalation: false
- capabilities:
- drop:
- - "ALL"
- env:
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- ports:
- - containerPort: 8080
- protocol: TCP
- name: metrics
- livenessProbe:
- httpGet:
- path: /healthz
- port: 8081
- initialDelaySeconds: 15
- periodSeconds: 20
- readinessProbe:
- httpGet:
- path: /readyz
- port: 8081
- initialDelaySeconds: 5
- periodSeconds: 10
- resources:
- limits:
- cpu: "1"
- memory: 1Gi
- requests:
- cpu: "1"
- memory: 1Gi
- serviceAccountName: controller-manager
- terminationGracePeriodSeconds: 10
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: codeflare-operator-config
- namespace: redhat-ods-applications
-data:
- config.yaml: |
- appwrapper:
- enabled: true
- Config:
- autopilot:
- injectAntiAffinities: true
- monitorNodes: true
- resourceTaints:
- nvidia.com/gpu:
- - key: autopilot.ibm.com/gpuhealth
- value: ERR
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: TESTING
- effect: NoSchedule
- - key: autopilot.ibm.com/gpuhealth
- value: EVICT
- effect: NoExecute
- defaultQueueName: default-queue
- enableKueueIntegrations: true
- kueueJobReconciller:
- manageJobsWithoutQueueName: true
- waitForPodsReady:
- blockAdmission: false
- enable: false
- schedulerName: scheduler-plugins-scheduler
- slackQueueName: slack-cluster-queue
- userRBACAdmissionCheck: false
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-kuberay
- namespace: redhat-ods-operator
-data:
- kuberay-operator-image-patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: kuberay-operator
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: kuberay-operator
- image: $(image)
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-kueue
- namespace: redhat-ods-operator
-data:
- controller_manager_config.yaml: |
- apiVersion: config.kueue.x-k8s.io/v1beta1
- kind: Configuration
- health:
- healthProbeBindAddress: :8081
- metrics:
- bindAddress: :8080
- enableClusterQueueResources: true
- webhook:
- port: 9443
- leaderElection:
- leaderElect: true
- resourceName: c1f6bfd2.kueue.x-k8s.io
- controller:
- groupKindConcurrency:
- Job.batch: 5
- Pod: 5
- Workload.kueue.x-k8s.io: 5
- LocalQueue.kueue.x-k8s.io: 1
- ClusterQueue.kueue.x-k8s.io: 1
- ResourceFlavor.kueue.x-k8s.io: 1
- clientConnection:
- qps: 50
- burst: 100
- #pprofBindAddress: :8082
- waitForPodsReady:
- enable: false
- blockAdmission: false
- manageJobsWithoutQueueName: true
- #internalCertManagement:
- # enable: false
- # webhookServiceName: ""
- # webhookSecretName: ""
- integrations:
- frameworks:
- # - "batch/job"
- - "kubeflow.org/mpijob"
- - "ray.io/rayjob"
- - "ray.io/raycluster"
- - "jobset.x-k8s.io/jobset"
- - "kubeflow.org/mxjob"
- - "kubeflow.org/paddlejob"
- - "kubeflow.org/pytorchjob"
- - "kubeflow.org/tfjob"
- - "kubeflow.org/xgboostjob"
- # - "pod"
- externalFrameworks:
- - "AppWrapper.v1beta2.workload.codeflare.dev"
- # podOptions:
- # namespaceSelector:
- # matchExpressions:
- # - key: kubernetes.io/metadata.name
- # operator: NotIn
- # values: [ kube-system, kueue-system ]
- fairSharing:
- enable: true
- preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
- manager_config_patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: controller-manager
- namespace: system
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: manager
- image: $(image)
- args:
- - "--config=/controller_manager_config.yaml"
- - "--zap-log-level=2"
- - "--feature-gates=LendingLimit=true"
- volumeMounts:
- - name: manager-config
- mountPath: /controller_manager_config.yaml
- subPath: controller_manager_config.yaml
- volumes:
- - name: manager-config
- configMap:
- name: manager-config
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: mlbatch-training-operator
- namespace: redhat-ods-operator
-data:
- manager_config_patch.yaml: |
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: training-operator
- spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: training-operator
- image: $(image)
- args:
- - "--zap-log-level=2"
- - "--gang-scheduler-name=scheduler-plugins-scheduler"
- resources:
- requests:
- cpu: 100m
- memory: 100Mi
- limits:
- cpu: 500m
- memory: 1000Mi
----
-apiVersion: operators.coreos.com/v1alpha1
-kind: Subscription
-metadata:
- name: rhods-operator
- namespace: redhat-ods-operator
-spec:
- channel: stable
- installPlanApproval: Manual
- name: rhods-operator
- source: redhat-operators
- sourceNamespace: openshift-marketplace
- startingCSV: rhods-operator.2.19.0
- config:
- env:
- - name: "DISABLE_DSC_CONFIG"
- volumeMounts:
- - name: mlbatch-codeflare
- mountPath: /opt/manifests/codeflare/manager/manager.yaml
- subPath: manager.yaml
- - name: mlbatch-kuberay
- mountPath: /opt/manifests/ray/openshift/kuberay-operator-image-patch.yaml
- subPath: kuberay-operator-image-patch.yaml
- - name: mlbatch-kueue
- mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml
- subPath: controller_manager_config.yaml
- - name: mlbatch-kueue
- mountPath: /opt/manifests/kueue/rhoai/manager_config_patch.yaml
- subPath: manager_config_patch.yaml
- - name: mlbatch-training-operator
- mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml
- subPath: manager_config_patch.yaml
- volumes:
- - name: mlbatch-codeflare
- configMap:
- name: mlbatch-codeflare
- - name: mlbatch-kuberay
- configMap:
- name: mlbatch-kuberay
- - name: mlbatch-kueue
- configMap:
- name: mlbatch-kueue
- - name: mlbatch-training-operator
- configMap:
- name: mlbatch-training-operator
diff --git a/setup.RHOAI-v2.19/scheduler-priority-patch.yaml b/setup.RHOAI-v2.19/scheduler-priority-patch.yaml
deleted file mode 100644
index 278802f..0000000
--- a/setup.RHOAI-v2.19/scheduler-priority-patch.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-- op: add
- path: /spec/template/spec/priorityClassName
- value: system-node-critical
diff --git a/setup.k8s/CLUSTER-SETUP.md b/setup.k8s/CLUSTER-SETUP.md
deleted file mode 100644
index 9ce72c1..0000000
--- a/setup.k8s/CLUSTER-SETUP.md
+++ /dev/null
@@ -1,193 +0,0 @@
-# Cluster Setup
-
-The cluster setup installs and configures the following components:
-+ Scheduler Plugins
-+ Kubeflow Training Operator
-+ KubeRay
-+ Kueue
-+ AppWrappers
-+ Cluster roles and priority classes
-+ Autopilot
-
-## Priorities
-
-Create `default-priority`, `high-priority`, and `low-priority` priority classes:
-```sh
-kubectl apply -f setup.k8s/mlbatch-priorities.yaml
-```
-
-## Scheduler Configuration
-
-MLBatch configures Kubernetes scheduling to accomplish two objectives:
-+ Obtaining gang (all or nothing) scheduling for multi-Pod workloads.
-+ Packing Pods whose GPU request is less than the number of GPUs on a Node to
- maximize the number of Nodes available for Pods that request all the GPUs on a Node.
-
-The currently recommend way to do this is by installing the Coscheduling out-of-tree scheduler
-plugin and configuring the default NodeResourcesFit scheduler plugin to pack in the GPU dimension.
-Alternatively, you can skip the helm install and patch commands shown below and instead install
-the experimental Sakkara scheduler plugin (described next).
-
-
-```sh
-helm install scheduler-plugins --namespace scheduler-plugins --create-namespace \
- scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
- --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
-```
-Patch scheduler-plugins pod priorities:
-```sh
-kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
-kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
-```
-
-### Sakkara
-
-[Sakkara](https://github.com/atantawi/scheduler-plugins/tree/sakkara) is an experimental
-new scheduler plugin with advanced support for topology-aware scheduling.
-
-Install Sakkara as a secondary scheduler:
-```sh
-helm install sakkara-scheduler --namespace sakkara-scheduler --create-namespace mlbatch/sakkara-scheduler
-```
-Optionally, create a config map capturing your cluster's topology as described in the [Sakkara documentation](https://github.com/atantawi/sakkara-deploy/tree/main?tab=readme-ov-file#cluster-topology). This step is optional but recommended for production clusters. If the config map is not present Sakkara will default to a single-level hierarchy containing the Nodes of the cluster.
-
-## Install Operators
-
-Create the mlbatch-system namespace
-```sh
-kubectl create namespace mlbatch-system
-```
-
-Install the Kubeflow Training Operator
-
-If you are using Coscheduling do:
-```sh
-kubectl apply --server-side -k setup.k8s/training-operator/coscheduling
-```
-If you are using Sakkara do:
-```sh
-kubectl apply --server-side -k setup.k8s/training-operator/sakkara
-```
-
-Install the KubeRay Operator
-```sh
-kubectl apply --server-side -k setup.k8s/kuberay
-```
-
-Install Kueue
-```sh
-kubectl apply --server-side -k setup.k8s/kueue
-```
-
-Install the AppWrapper Operator
-If you are using Coscheduling do:
-```sh
-kubectl apply --server-side -k setup.k8s/appwrapper/coscheduling
-```
-If you are using Sakkara do:
-```sh
-kubectl apply --server-side -k setup.k8s/appwrapper/sakkara
-```
-
-The provided configuration differs from the default configuration of the
-operators as follows:
-- Kubeflow Training Operator:
- - `gang-scheduler-name` is set to either `scheduler-plugins-scheduler` or `sakkara-scheduler`,
-- Kueue:
- - `batch/job` integration is disabled,
- - `manageJobsWithoutQueueName` is enabled and configured via `managedJobsNamespaceSelector` to be
- scoped to only namespaces that are labeled with `mlbatch-team-namespace=true`.
- - `waitForPodsReady` is disabled,
- - `LendingLimit` feature gate is enabled,
- - `fairSharing` is enabled,
- - `enableClusterQueueResources` metrics is enabled,
-- AppWrapper operator:
- - `userRBACAdmissionCheck` is disabled,
- - `schedulerName` is set to `scheduler-plugins-scheduler` or `sakkara-scheduler`,
- - `queueName` is set to `default-queue`,
-- pod priorities, resource requests and limits have been adjusted.
-
-## Autopilot
-
-Helm charts values and how-to for customization can be found [in the official documentation](https://github.com/IBM/autopilot/blob/main/helm-charts/autopilot/README.md). As-is, Autopilot will run on GPU nodes.
-
-- Add the Autopilot Helm repository
-
-```bash
-helm repo add autopilot https://ibm.github.io/autopilot/
-helm repo update
-```
-
-- Install the chart (idempotent command). The config file is for customizing the helm values and it is optional.
-
-```bash
-helm upgrade autopilot autopilot/autopilot --install --namespace=autopilot --create-namespace -f your-config.yml
-```
-
-### Enabling Prometheus metrics
-
-The `ServiceMonitor` object is the one that enables Prometheus to scrape the metrics produced by Autopilot.
-In order for Prometheus to find the right objects, the `ServiceMonitor` needs to be annotated with the Prometheus' release name. It is usually `prometheus`, and that's the default added in the Autopilot release.
-If that is not the case in your cluster, the correct release label can be found by checking in the `ServiceMonitor` of Prometheus itself, or the name of Prometheus helm chart.
-Then, Autopilot's `ServiceMonitor` can be labeled with the following command
-
-```bash
-kubectl label servicemonitors.monitoring.coreos.com -n autopilot autopilot-metrics-monitor release= --overwrite
-```
-
-## Kueue Configuration
-
-Create Kueue's default flavor:
-```sh
-kubectl apply -f setup.k8s/default-flavor.yaml
-```
-
-## Cluster Role
-
-Create `mlbatch-edit` role:
-```sh
-kubectl apply -f setup.k8s/mlbatch-edit-role.yaml
-```
-
-## Slack Cluster Queue
-
-Create the designated slack `ClusterQueue` which will be used to automate
-minor adjustments to cluster capacity caused by node failures and
-scheduler maintanence.
-```sh
-kubectl apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: slack-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- - name: "memory"
- nominalQuota: 128Gi
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 1
- - name: "pods"
- nominalQuota: 100
-EOF
-```
-Edit the above quantities to adjust the quota to the desired
-values. Pod counts are optional and can be omitted from the list of
-covered resources. The `lendingLimit` for each resource will be
-dynamically adjusted by the MLBatch system to reflect reduced cluster
-capacity. See [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md) for a
-detailed discussion of the role of the slack `ClusterQueue`.
diff --git a/setup.k8s/TEAM-SETUP.md b/setup.k8s/TEAM-SETUP.md
deleted file mode 100644
index 3f1fc38..0000000
--- a/setup.k8s/TEAM-SETUP.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# Team Setup
-
-A *team* in MLBatch is a group of users that share a resource quota.
-
-Before setting up your teams and quotas, please read [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md)
-for a discussion of our recommended best practices.
-
-Setting up a new team requires the cluster admin to create a namespace,
-a quota, a queue, and the required role bindings as described below.
-
-Create and label the namespace:
-```sh
-kubectl create namespace team1
-kubectl label namespace team1 'mlbatch-team-namespace=true'
-```
-
-For each user on the team, create a RoleBinding:
-```sh
-kubectl -n team1 apply -f- << EOF
-kind: RoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: user-one
-subjects:
- - kind: User
- apiGroup: rbac.authorization.k8s.io
- name: user-one
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: mlbatch-edit
-EOF
-```
-
-Specify the intended quota for the namespace by creating a `ClusterQueue`:
-```sh
-kubectl apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: team1-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "memory"
- nominalQuota: 128Gi
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/gpu"
- nominalQuota: 16
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 4
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "pods"
- nominalQuota: 100
- # borrowingLimit: 0
- # lendingLimit: 0
-EOF
-```
-Edit the above quantities to adjust the quota to the desired values. Pod counts
-are optional and can be omitted from the list of covered resources.
-
-Uncomment all `borrowingLimit` lines to prevent this namespace from borrowing
-quota from other namespaces. Uncomment all `lendingLimit` lines to prevent other
-namespaces from borrowing quota from this namespace.
-
-Create a `LocalQueue` to bind the `ClusterQueue` to the namespace:
-```sh
-kubectl apply -n team1 -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: team1-cluster-queue
-EOF
-```
-We recommend naming the local queue `default-queue` as `AppWrappers` will
-default to this queue name.
-
diff --git a/setup.k8s/UNINSTALL.md b/setup.k8s/UNINSTALL.md
deleted file mode 100644
index 70ec929..0000000
--- a/setup.k8s/UNINSTALL.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Uninstall
-
-***First, remove all team namespaces and corresponding cluster queues.***
-
-Then to uninstall the MLBatch controllers and reclaim the corresponding
-namespaces, do the following:
-```sh
-# Delete operators and CRDs
-kubectl delete -k setup.k8s/appwrapper/base
-kubectl delete -k setup.k8s/kueue
-kubectl delete -k setup.k8s/kuberay
-kubectl delete -k setup.k8s/training-operator/base
-
-# Delete namespace
-kubectl delete namespace mlbatch-system
-
-# Delete clusterole
-kubectl delete clusterrole mlbatch-edit
-
-# Coscheduler uninstall
-helm uninstall -n scheduler-plugins scheduler-plugins
-kubectl delete namespace scheduler-plugins
-
-# Sakkara uninstall
-helm uninstall -n sakkara-scheduler sakkara-scheduler
-kubectl delete namespace sakkara-scheduler
-```
diff --git a/setup.k8s/appwrapper/base/kustomization.yaml b/setup.k8s/appwrapper/base/kustomization.yaml
deleted file mode 100644
index 440f25b..0000000
--- a/setup.k8s/appwrapper/base/kustomization.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-
-namespace: mlbatch-system
-
-resources:
-- "https://github.com/project-codeflare/appwrapper/config/default?ref=v0.30.0"
-
-labels:
-- pairs:
- app.kubernetes.io/name: appwrapper
- app.kubernetes.io/component: controller
- includeSelectors: true
-
-images:
-- name: quay.io/ibm/appwrapper
- newTag: v0.30.0
-
-patches:
-- path: manager_resources_patch.yaml
-- path: remove_default_namespace.yaml
diff --git a/setup.k8s/appwrapper/base/manager_resources_patch.yaml b/setup.k8s/appwrapper/base/manager_resources_patch.yaml
deleted file mode 100644
index 1b26c3c..0000000
--- a/setup.k8s/appwrapper/base/manager_resources_patch.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: controller-manager
- namespace: system
-spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: manager
- resources:
- requests:
- cpu: 250m
- memory: 250Mi
- limits:
- cpu: 1000m
- memory: 1000Mi
diff --git a/setup.k8s/appwrapper/base/remove_default_namespace.yaml b/setup.k8s/appwrapper/base/remove_default_namespace.yaml
deleted file mode 100644
index b63fb95..0000000
--- a/setup.k8s/appwrapper/base/remove_default_namespace.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-$patch: delete
-apiVersion: v1
-kind: Namespace
-metadata:
- name: appwrapper-system
diff --git a/setup.k8s/appwrapper/coscheduling/config_patch.yaml b/setup.k8s/appwrapper/coscheduling/config_patch.yaml
deleted file mode 100644
index 6e1b592..0000000
--- a/setup.k8s/appwrapper/coscheduling/config_patch.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-kind: ConfigMap
-apiVersion: v1
-metadata:
- name: appwrapper-operator-config
- namespace: appwrapper-system
-data:
- config.yaml: |
- appwrapper:
- enableKueueIntegrations: true
- kueueJobReconciller:
- manageJobsWithoutQueueName: true
- waitForPodsReady:
- enable: false
- defaultQueueName: default-queue
- schedulerName: scheduler-plugins-scheduler
- slackQueueName: slack-cluster-queue
- userRBACAdmissionCheck: false
- controllerManager:
- health:
- bindAddress: ":8081"
- metrics:
- bindAddress: "127.0.0.1:8080"
- leaderElection: true
diff --git a/setup.k8s/appwrapper/coscheduling/kustomization.yaml b/setup.k8s/appwrapper/coscheduling/kustomization.yaml
deleted file mode 100644
index c651d8a..0000000
--- a/setup.k8s/appwrapper/coscheduling/kustomization.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: mlbatch-system
-
-resources:
-- ../base
-
-patches:
-patches:
-- path: config_patch.yaml
diff --git a/setup.k8s/appwrapper/sakkara/config_patch.yaml b/setup.k8s/appwrapper/sakkara/config_patch.yaml
deleted file mode 100644
index f657b58..0000000
--- a/setup.k8s/appwrapper/sakkara/config_patch.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-kind: ConfigMap
-apiVersion: v1
-metadata:
- name: appwrapper-operator-config
- namespace: appwrapper-system
-data:
- config.yaml: |
- appwrapper:
- enableKueueIntegrations: true
- kueueJobReconciller:
- manageJobsWithoutQueueName: true
- waitForPodsReady:
- enable: false
- defaultQueueName: default-queue
- schedulerName: sakkara-scheduler
- slackQueueName: slack-cluster-queue
- userRBACAdmissionCheck: false
- controllerManager:
- health:
- bindAddress: ":8081"
- metrics:
- bindAddress: "127.0.0.1:8080"
- leaderElection: true
diff --git a/setup.k8s/appwrapper/sakkara/kustomization.yaml b/setup.k8s/appwrapper/sakkara/kustomization.yaml
deleted file mode 100644
index c651d8a..0000000
--- a/setup.k8s/appwrapper/sakkara/kustomization.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: mlbatch-system
-
-resources:
-- ../base
-
-patches:
-patches:
-- path: config_patch.yaml
diff --git a/setup.k8s/default-flavor.yaml b/setup.k8s/default-flavor.yaml
deleted file mode 100644
index 6cbccf3..0000000
--- a/setup.k8s/default-flavor.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ResourceFlavor
-metadata:
- name: default-flavor
diff --git a/setup.k8s/kind/kind-config.yaml b/setup.k8s/kind/kind-config.yaml
deleted file mode 100644
index f5d7a9e..0000000
--- a/setup.k8s/kind/kind-config.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-# this config file contains all config fields with comments
-kind: Cluster
-apiVersion: kind.x-k8s.io/v1alpha4
-# 1 control plane node and 1 worker node
-nodes:
-# the control plane node config
-- role: control-plane
- # kubernetes version 1.27.17 from kind v0.24.0
- image: kindest/node:v1.27.17@sha256:3fd82731af34efe19cd54ea5c25e882985bafa2c9baefe14f8deab1737d9fabe
-# the worker
-- role: worker
- # kubernetes version 1.27.17 from kind v0.24.0
- image: kindest/node:v1.27.17@sha256:3fd82731af34efe19cd54ea5c25e882985bafa2c9baefe14f8deab1737d9fabe
diff --git a/setup.k8s/kuberay/kustomization.yaml b/setup.k8s/kuberay/kustomization.yaml
deleted file mode 100644
index 0161395..0000000
--- a/setup.k8s/kuberay/kustomization.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-
-namespace: mlbatch-system
-
-resources:
-- "https://github.com/ray-project/kuberay/ray-operator/config/default?ref=v1.1.0"
-
-labels:
-- pairs:
- app.kubernetes.io/name: kuberay
- app.kubernetes.io/component: controller
- includeSelectors: true
-
-patches:
-- path: remove_default_namespace.yaml
-- path: manager_resources_patch.yaml
diff --git a/setup.k8s/kuberay/manager_resources_patch.yaml b/setup.k8s/kuberay/manager_resources_patch.yaml
deleted file mode 100644
index 7bb80d9..0000000
--- a/setup.k8s/kuberay/manager_resources_patch.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: kuberay-operator
- namespace: system
-spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: kuberay-operator
- args:
- - "--zap-log-level=2"
- resources:
- requests:
- cpu: 100m
- memory: 100Mi
- limits:
- cpu: 500m
- memory: 1000Mi
diff --git a/setup.k8s/kuberay/remove_default_namespace.yaml b/setup.k8s/kuberay/remove_default_namespace.yaml
deleted file mode 100644
index b5977cc..0000000
--- a/setup.k8s/kuberay/remove_default_namespace.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-$patch: delete
-apiVersion: v1
-kind: Namespace
-metadata:
- name: ray-system
diff --git a/setup.k8s/kueue/controller_manager_config.yaml b/setup.k8s/kueue/controller_manager_config.yaml
deleted file mode 100644
index 0f395ac..0000000
--- a/setup.k8s/kueue/controller_manager_config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-apiVersion: config.kueue.x-k8s.io/v1beta1
-kind: Configuration
-health:
- healthProbeBindAddress: :8081
-metrics:
- bindAddress: :8080
- enableClusterQueueResources: true
-webhook:
- port: 9443
-leaderElection:
- leaderElect: true
- resourceName: c1f6bfd2.kueue.x-k8s.io
-controller:
- groupKindConcurrency:
-# Job.batch: 5
- Pod: 5
- Workload.kueue.x-k8s.io: 5
- LocalQueue.kueue.x-k8s.io: 1
- Cohort.kueue.x-k8s.io: 1
- ClusterQueue.kueue.x-k8s.io: 1
- ResourceFlavor.kueue.x-k8s.io: 1
-clientConnection:
- qps: 50
- burst: 100
-#pprofBindAddress: :8083
-waitForPodsReady:
- enable: false
-# timeout: 5m
-# blockAdmission: false
-# requeuingStrategy:
-# timestamp: Eviction
-# backoffLimitCount: null # null indicates infinite requeuing
-# backoffBaseSeconds: 60
-# backoffMaxSeconds: 3600
-manageJobsWithoutQueueName: true
-managedJobsNamespaceSelector:
- matchLabels:
- mlbatch-team-namespace: "true"
-#internalCertManagement:
-# enable: false
-# webhookServiceName: ""
-# webhookSecretName: ""
-integrations:
- frameworks:
-# - "batch/job"
- - "kubeflow.org/mpijob"
- - "ray.io/rayjob"
- - "ray.io/raycluster"
- - "jobset.x-k8s.io/jobset"
- - "kubeflow.org/mxjob"
- - "kubeflow.org/paddlejob"
- - "kubeflow.org/pytorchjob"
- - "kubeflow.org/tfjob"
- - "kubeflow.org/xgboostjob"
-# - "pod"
-# - "deployment" # requires enabling pod integration
-# - "statefulset" # requires enabling pod integration
- externalFrameworks:
- - "AppWrapper.v1beta2.workload.codeflare.dev"
-# podOptions:
-# namespaceSelector:
-# matchExpressions:
-# - key: kubernetes.io/metadata.name
-# operator: NotIn
-# values: [ kube-system, kueue-system ]
-fairSharing:
- enable: true
- preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
-#resources:
-# excludeResourcePrefixes: []
-# transformations:
-# - input: nvidia.com/mig-4g.5gb
-# strategy: Replace | Retain
-# outputs:
-# example.com/accelerator-memory: 5Gi
-# example.com/accelerator-gpc: 4
diff --git a/setup.k8s/kueue/kustomization.yaml b/setup.k8s/kueue/kustomization.yaml
deleted file mode 100644
index 5b7004c..0000000
--- a/setup.k8s/kueue/kustomization.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-
-namespace: mlbatch-system
-
-resources:
-- "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.10.2"
-
-labels:
-- pairs:
- app.kubernetes.io/name: kueue
- app.kubernetes.io/component: controller
- includeSelectors: true
-
-configMapGenerator:
-- name: manager-config
- namespace: kueue-system
- behavior: replace
- files:
- - controller_manager_config.yaml
-
-images:
-- name: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue
- newName: registry.k8s.io/kueue/kueue
- newTag: v0.10.2
-
-patches:
-- path: manager_resources_patch.yaml
-- path: mutating_webhook_patch.yaml
-- path: remove_default_namespace.yaml
-- path: validating_webhook_patch.yaml
-- target:
- kind: ClusterRole
- name: manager-role
- patch: |
- - op: add
- path: /rules/-
- value:
- apiGroups:
- - workload.codeflare.dev
- resources:
- - appwrappers
- verbs:
- - get
- - list
- - watch
-- target:
- kind: Deployment
- name: controller-manager
- patch: |
- - op: add
- path: /spec/template/spec/containers/0/args/-
- value: "--feature-gates=LendingLimit=true"
diff --git a/setup.k8s/kueue/manager_resources_patch.yaml b/setup.k8s/kueue/manager_resources_patch.yaml
deleted file mode 100644
index 5dc7501..0000000
--- a/setup.k8s/kueue/manager_resources_patch.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: controller-manager
- namespace: system
-spec:
- template:
- spec:
- priorityClassName: system-node-critical
diff --git a/setup.k8s/kueue/mutating_webhook_patch.yaml b/setup.k8s/kueue/mutating_webhook_patch.yaml
deleted file mode 100644
index 61d0e1d..0000000
--- a/setup.k8s/kueue/mutating_webhook_patch.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: admissionregistration.k8s.io/v1
-kind: MutatingWebhookConfiguration
-metadata:
- name: mutating-webhook-configuration
-webhooks:
- - $patch: delete
- name: mpod.kb.io
- - $patch: delete
- name: mjob.kb.io
diff --git a/setup.k8s/kueue/remove_default_namespace.yaml b/setup.k8s/kueue/remove_default_namespace.yaml
deleted file mode 100644
index 787ee88..0000000
--- a/setup.k8s/kueue/remove_default_namespace.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-$patch: delete
-apiVersion: v1
-kind: Namespace
-metadata:
- name: kueue-system
diff --git a/setup.k8s/kueue/validating_webhook_patch.yaml b/setup.k8s/kueue/validating_webhook_patch.yaml
deleted file mode 100644
index 3fe0342..0000000
--- a/setup.k8s/kueue/validating_webhook_patch.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: admissionregistration.k8s.io/v1
-kind: ValidatingWebhookConfiguration
-metadata:
- name: validating-webhook-configuration
-webhooks:
- - $patch: delete
- name: vpod.kb.io
- - $patch: delete
- name: vjob.kb.io
diff --git a/setup.k8s/mlbatch-edit-role.yaml b/setup.k8s/mlbatch-edit-role.yaml
deleted file mode 100644
index a3db811..0000000
--- a/setup.k8s/mlbatch-edit-role.yaml
+++ /dev/null
@@ -1,135 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: mlbatch-edit
-rules:
-- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - apps
- resources:
- - deployments
- - statefulsets
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - services
- - secrets
- - configmaps
- - persistentvolumeclaims
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - kueue.x-k8s.io
- resources:
- - "*"
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - kubeflow.org
- resources:
- - pytorchjobs
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ray.io
- resources:
- - rayjobs
- - rayclusters
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
- - get
- - list
- - watch
-- apiGroups:
- - workload.codeflare.dev
- resources:
- - appwrappers
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - scheduling.k8s.io
- resources:
- - priorityclasses
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - scheduling.x-k8s.io
- resources:
- - podgroups
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ""
- resources:
- - events
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - ""
- resources:
- - namespaces
- - pods/log
- verbs:
- - get
-- apiGroups:
- - ""
- resources:
- - pods/exec
- - pods/portforward
- verbs:
- - create
diff --git a/setup.k8s/mlbatch-priorities.yaml b/setup.k8s/mlbatch-priorities.yaml
deleted file mode 100644
index 77c8f3b..0000000
--- a/setup.k8s/mlbatch-priorities.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: low-priority
-value: 1
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class for all lower priority jobs."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: default-priority
-value: 5
-preemptionPolicy: PreemptLowerPriority
-globalDefault: true
-description: "This is the priority class for all jobs (default priority)."
----
-apiVersion: scheduling.k8s.io/v1
-kind: PriorityClass
-metadata:
- name: high-priority
-value: 10
-preemptionPolicy: PreemptLowerPriority
-globalDefault: false
-description: "This is the priority class defined for highly important jobs that would evict lower and default priority jobs."
diff --git a/setup.k8s/scheduler-priority-patch.yaml b/setup.k8s/scheduler-priority-patch.yaml
deleted file mode 100644
index 278802f..0000000
--- a/setup.k8s/scheduler-priority-patch.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-- op: add
- path: /spec/template/spec/priorityClassName
- value: system-node-critical
diff --git a/setup.k8s/training-operator/base/kustomization.yaml b/setup.k8s/training-operator/base/kustomization.yaml
deleted file mode 100644
index 6aa6dc2..0000000
--- a/setup.k8s/training-operator/base/kustomization.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: mlbatch-system
-
-resources:
-- "https://github.com/kubeflow/training-operator/manifests/base?ref=v1.7.0"
-
-labels:
-- pairs:
- app.kubernetes.io/name: training-operator
- app.kubernetes.io/component: controller
- includeSelectors: true
-
-images:
-- name: kubeflow/training-operator
- newTag: "v1-855e096"
-
-patches:
-- path: manager_resources_patch.yaml
diff --git a/setup.k8s/training-operator/base/manager_resources_patch.yaml b/setup.k8s/training-operator/base/manager_resources_patch.yaml
deleted file mode 100644
index 93f052b..0000000
--- a/setup.k8s/training-operator/base/manager_resources_patch.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: training-operator
-spec:
- template:
- spec:
- priorityClassName: system-node-critical
- containers:
- - name: training-operator
- args:
- - "--zap-log-level=2"
- resources:
- requests:
- cpu: 100m
- memory: 100Mi
- limits:
- cpu: 500m
- memory: 1000Mi
diff --git a/setup.k8s/training-operator/coscheduling/kustomization.yaml b/setup.k8s/training-operator/coscheduling/kustomization.yaml
deleted file mode 100644
index dc8e5ba..0000000
--- a/setup.k8s/training-operator/coscheduling/kustomization.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: mlbatch-system
-
-resources:
-- ../base
-
-patches:
-- target:
- kind: Deployment
- name: training-operator
- patch: |
- - op: add
- path: /spec/template/spec/containers/0/args/-
- value: "--gang-scheduler-name=scheduler-plugins-scheduler"
diff --git a/setup.k8s/training-operator/sakkara/kustomization.yaml b/setup.k8s/training-operator/sakkara/kustomization.yaml
deleted file mode 100644
index 4b40383..0000000
--- a/setup.k8s/training-operator/sakkara/kustomization.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: mlbatch-system
-
-resources:
-- ../base
-
-patches:
-- target:
- kind: Deployment
- name: training-operator
- patch: |
- - op: add
- path: /spec/template/spec/containers/0/args/-
- value: "--gang-scheduler-name=sakkara-scheduler"
diff --git a/setup.tmpl/CLUSTER-SETUP.md.tmpl b/setup.tmpl/CLUSTER-SETUP.md.tmpl
deleted file mode 100644
index 1cb3f8d..0000000
--- a/setup.tmpl/CLUSTER-SETUP.md.tmpl
+++ /dev/null
@@ -1,291 +0,0 @@
-# Cluster Setup
-
-{{ if .RHOAI -}}
-The cluster setup installs Red Hat OpenShift AI and configures Scheduler Plugins, Kueue,
-cluster roles, and priority classes.
-
-{{- else -}}
-The cluster setup installs and configures the following components:
-+ Scheduler Plugins
-+ Kubeflow Training Operator
-+ KubeRay
-+ Kueue
-+ AppWrappers
-+ Cluster roles and priority classes
-+ Autopilot
-
-{{- end }}
-
-## Priorities
-
-Create `default-priority`, `high-priority`, and `low-priority` priority classes:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/mlbatch-priorities.yaml
-```
-
-## Scheduler Configuration
-
-MLBatch configures Kubernetes scheduling to accomplish two objectives:
-+ Obtaining gang (all or nothing) scheduling for multi-Pod workloads.
-+ Packing Pods whose GPU request is less than the number of GPUs on a Node to
- maximize the number of Nodes available for Pods that request all the GPUs on a Node.
-
-{{ if .RHOAI -}}
-This is done by installing the Coscheduling out-of-tree scheduler plugin and configuring
-the default NodeResourcesFit scheduler plugin to pack in the GPU dimension.
-{{- else -}}
-The currently recommend way to do this is by installing the Coscheduling out-of-tree scheduler
-plugin and configuring the default NodeResourcesFit scheduler plugin to pack in the GPU dimension.
-Alternatively, you can skip the helm install and patch commands shown below and instead install
-the experimental Sakkara scheduler plugin (described next).
-{{- end }}
-
-
-```sh
-helm install scheduler-plugins --namespace scheduler-plugins --create-namespace \
- scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
- --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
-```
-Patch scheduler-plugins pod priorities:
-```sh
-{{ .KUBECTL }} patch deployment -n scheduler-plugins --type=json --patch-file setup.{{ .VERSION }}/scheduler-priority-patch.yaml scheduler-plugins-controller
-{{ .KUBECTL }} patch deployment -n scheduler-plugins --type=json --patch-file setup.{{ .VERSION }}/scheduler-priority-patch.yaml scheduler-plugins-scheduler
-```
-
-{{ if not .RHOAI -}}
-### Sakkara
-
-[Sakkara](https://github.com/atantawi/scheduler-plugins/tree/sakkara) is an experimental
-new scheduler plugin with advanced support for topology-aware scheduling.
-
-Install Sakkara as a secondary scheduler:
-```sh
-helm install sakkara-scheduler --namespace sakkara-scheduler --create-namespace mlbatch/sakkara-scheduler
-```
-Optionally, create a config map capturing your cluster's topology as described in the [Sakkara documentation](https://github.com/atantawi/sakkara-deploy/tree/main?tab=readme-ov-file#cluster-topology). This step is optional but recommended for production clusters. If the config map is not present Sakkara will default to a single-level hierarchy containing the Nodes of the cluster.
-{{- end }}
-
-{{ if .RHOAI -}}
-## Red Hat OpenShift AI
-
-Create the Red Hat OpenShift AI subscription:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/mlbatch-subscription.yaml
-````
-Identify install plan:
-```sh
-{{ .KUBECTL }} get ip -n redhat-ods-operator
-```
-```
-NAMESPACE NAME CSV APPROVAL APPROVED
-redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false
-```
-Approve install plan replacing the generated plan name below with the actual
-value:
-```sh
-{{ .KUBECTL }} patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kmh8w
-```
-Create DSC Initialization:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/mlbatch-dsci.yaml
-```
-Create Data Science Cluster:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/mlbatch-dsc.yaml
-```
-The provided DSCI and DSC are intended to install a minimal set of Red Hat OpenShift
-AI managed components: `codeflare`, `kueue`, `ray`, and `trainingoperator`. The
-remaining components such as `dashboard` can be optionally enabled.
-
-The configuration of the managed components differs from the default Red Hat OpenShift
-AI configuration as follows:
-- Kubeflow Training Operator:
- - `gang-scheduler-name` is set to `scheduler-plugins-scheduler`,
-- Kueue:
- - `manageJobsWithoutQueueName` is enabled,
- - `batch/job` integration is disabled,
- - `waitForPodsReady` is disabled,
- - `LendingLimit` feature gate is enabled,
-{{- if .FAIRSHARE }}
- - `fairSharing` is enabled,
-{{- end }}
- - `enableClusterQueueResources` metrics is enabled,
-- Codeflare operator:
- - the AppWrapper controller is enabled and configured as follows:
- - `userRBACAdmissionCheck` is disabled,
- - `schedulerName` is set to `scheduler-plugins-scheduler`,
- - `queueName` is set to `default-queue`,
- - `slackQueueName` is set to `slack-cluster-queue`
-- pod priorities, resource requests and limits have been adjusted.
-
-{{ if (eq .VERSION "RHOAI-v2.13") -}}
-To work around https://issues.redhat.com/browse/RHOAIENG-7887 (a race condition
-in Red Hat OpenShift AI installation), do a rolling restart of the Kueue manager.
-```sh
-{{ .KUBECTL }} rollout restart deployment/kueue-controller-manager -n redhat-ods-applications
-```
-
-After doing the restart, verify that you see the following lines in the
-kueue-controller-manager's log:
-```sh
-{"level":"info","ts":"2024-06-25T20:17:25.689638786Z","logger":"controller-runtime.builder","caller":"builder/webhook.go:189","msg":"Registering a validating webhook","GVK":"kubeflow.org/v1, Kind=PyTorchJob","path":"/validate-kubeflow-org-v1-pytorchjob"}
-{"level":"info","ts":"2024-06-25T20:17:25.689698615Z","logger":"controller-runtime.webhook","caller":"webhook/server.go:183","msg":"Registering webhook","path":"/validate-kubeflow-org-v1-pytorchjob"}
-{"level":"info","ts":"2024-06-25T20:17:25.689743757Z","logger":"setup","caller":"jobframework/setup.go:81","msg":"Set up controller and webhook for job framework","jobFrameworkName":"kubeflow.org/pytorchjob"}
-
-```
-{{- end }}
-{{- else -}}
-## Install Operators
-
-Create the mlbatch-system namespace
-```sh
-{{ .KUBECTL }} create namespace mlbatch-system
-```
-
-Install the Kubeflow Training Operator
-
-If you are using Coscheduling do:
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/training-operator/coscheduling
-```
-If you are using Sakkara do:
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/training-operator/sakkara
-```
-
-Install the KubeRay Operator
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/kuberay
-```
-
-Install Kueue
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/kueue
-```
-
-Install the AppWrapper Operator
-If you are using Coscheduling do:
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/appwrapper/coscheduling
-```
-If you are using Sakkara do:
-```sh
-{{ .KUBECTL }} apply --server-side -k setup.{{ .VERSION }}/appwrapper/sakkara
-```
-
-The provided configuration differs from the default configuration of the
-operators as follows:
-- Kubeflow Training Operator:
- - `gang-scheduler-name` is set to either `scheduler-plugins-scheduler` or `sakkara-scheduler`,
-- Kueue:
- - `batch/job` integration is disabled,
- - `manageJobsWithoutQueueName` is enabled and configured via `managedJobsNamespaceSelector` to be
- scoped to only namespaces that are labeled with `mlbatch-team-namespace=true`.
- - `waitForPodsReady` is disabled,
- - `LendingLimit` feature gate is enabled,
-{{- if .FAIRSHARE }}
- - `fairSharing` is enabled,
-{{- end }}
- - `enableClusterQueueResources` metrics is enabled,
-- AppWrapper operator:
- - `userRBACAdmissionCheck` is disabled,
- - `schedulerName` is set to `scheduler-plugins-scheduler` or `sakkara-scheduler`,
- - `queueName` is set to `default-queue`,
-- pod priorities, resource requests and limits have been adjusted.
-
-{{- end }}
-
-## Autopilot
-
-Helm charts values and how-to for customization can be found [in the official documentation](https://github.com/IBM/autopilot/blob/main/helm-charts/autopilot/README.md). As-is, Autopilot will run on GPU nodes.
-
-- Add the Autopilot Helm repository
-
-```bash
-helm repo add autopilot https://ibm.github.io/autopilot/
-helm repo update
-```
-
-- Install the chart (idempotent command). The config file is for customizing the helm values and it is optional.
-
-```bash
-helm upgrade autopilot autopilot/autopilot --install --namespace=autopilot --create-namespace -f your-config.yml
-```
-
-### Enabling Prometheus metrics
-
-{{ if .RHOAI -}}
-After completing the installation, manually label the namespace to enable metrics to be scraped by Prometheus with the following command:
-
-```bash
-{{ .KUBECTL }} label ns autopilot openshift.io/cluster-monitoring=true
-```
-
-The `ServiceMonitor` labeling is not required.
-{{- else -}}
-The `ServiceMonitor` object is the one that enables Prometheus to scrape the metrics produced by Autopilot.
-In order for Prometheus to find the right objects, the `ServiceMonitor` needs to be annotated with the Prometheus' release name. It is usually `prometheus`, and that's the default added in the Autopilot release.
-If that is not the case in your cluster, the correct release label can be found by checking in the `ServiceMonitor` of Prometheus itself, or the name of Prometheus helm chart.
-Then, Autopilot's `ServiceMonitor` can be labeled with the following command
-
-```bash
-{{ .KUBECTL }} label servicemonitors.monitoring.coreos.com -n autopilot autopilot-metrics-monitor release= --overwrite
-```
-{{- end }}
-
-## Kueue Configuration
-
-Create Kueue's default flavor:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/default-flavor.yaml
-```
-
-## Cluster Role
-
-Create `mlbatch-edit` role:
-```sh
-{{ .KUBECTL }} apply -f setup.{{ .VERSION }}/mlbatch-edit-role.yaml
-```
-
-## Slack Cluster Queue
-
-Create the designated slack `ClusterQueue` which will be used to automate
-minor adjustments to cluster capacity caused by node failures and
-scheduler maintanence.
-```sh
-{{ .KUBECTL }} apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: slack-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- - name: "memory"
- nominalQuota: 128Gi
- - name: "nvidia.com/gpu"
- nominalQuota: 8
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 1
- - name: "pods"
- nominalQuota: 100
-EOF
-```
-Edit the above quantities to adjust the quota to the desired
-values. Pod counts are optional and can be omitted from the list of
-covered resources. The `lendingLimit` for each resource will be
-dynamically adjusted by the MLBatch system to reflect reduced cluster
-capacity. See [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md) for a
-detailed discussion of the role of the slack `ClusterQueue`.
diff --git a/setup.tmpl/Kubernetes.yaml b/setup.tmpl/Kubernetes.yaml
deleted file mode 100644
index d498bdb..0000000
--- a/setup.tmpl/Kubernetes.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Values for Kubernetes v1.29+
-
-RHOAI: false
-VERSION: k8s
-KUBECTL: kubectl
-FAIRSHARE: true
diff --git a/setup.tmpl/Makefile b/setup.tmpl/Makefile
deleted file mode 100644
index a7fe221..0000000
--- a/setup.tmpl/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-.PHONY: all
-all: docs
-
-##@ General
-
-# The help target prints out all targets with their descriptions organized
-# beneath their categories. The categories are represented by '##@' and the
-# target descriptions by '##'. The awk command is responsible for reading the
-# entire set of makefiles included in this invocation, looking for lines of the
-# file as xyz: ## something, and then pretty-format the target and help. Then,
-# if there's a line with ##@ something, that gets pretty-printed as a category.
-# More info on the usage of ANSI control characters for terminal formatting:
-# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
-# More info on the awk command:
-# http://linuxcommand.org/lc3_adv_awk.php
-
-.PHONY: help
-help: ## Display this help.
- @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
-
-
-##@ Generate Documentation
-docs: gotmpl
- ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.RHOAI-v2.16/CLUSTER-SETUP.md -values RHOAI-v2.16.yaml
- ../tools/gotmpl/gotmpl -input ./TEAM-SETUP.md.tmpl -output ../setup.RHOAI-v2.16/TEAM-SETUP.md -values RHOAI-v2.16.yaml
- ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.RHOAI-v2.19/CLUSTER-SETUP.md -values RHOAI-v2.19.yaml
- ../tools/gotmpl/gotmpl -input ./TEAM-SETUP.md.tmpl -output ../setup.RHOAI-v2.19/TEAM-SETUP.md -values RHOAI-v2.19.yaml
- ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.k8s/CLUSTER-SETUP.md -values Kubernetes.yaml
- ../tools/gotmpl/gotmpl -input ./TEAM-SETUP.md.tmpl -output ../setup.k8s/TEAM-SETUP.md -values Kubernetes.yaml
-
-
-##@ Dependencies
-
-.PHONY: gotmpl
-gotmpl: ## Build gotmpl tool
- cd ../tools/gotmpl && go build ./...
diff --git a/setup.tmpl/RHOAI-v2.16.yaml b/setup.tmpl/RHOAI-v2.16.yaml
deleted file mode 100644
index 17cff67..0000000
--- a/setup.tmpl/RHOAI-v2.16.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Values for RHOAI 2.16
-
-RHOAI: true
-VERSION: RHOAI-v2.16
-KUBECTL: oc
-FAIRSHARE: true
diff --git a/setup.tmpl/RHOAI-v2.17.yaml b/setup.tmpl/RHOAI-v2.17.yaml
deleted file mode 100644
index c243c3c..0000000
--- a/setup.tmpl/RHOAI-v2.17.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Values for RHOAI 2.17
-
-RHOAI: true
-VERSION: RHOAI-v2.17
-KUBECTL: oc
-FAIRSHARE: true
diff --git a/setup.tmpl/RHOAI-v2.19.yaml b/setup.tmpl/RHOAI-v2.19.yaml
deleted file mode 100644
index 0b54073..0000000
--- a/setup.tmpl/RHOAI-v2.19.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Values for RHOAI 2.19
-
-RHOAI: true
-VERSION: RHOAI-v2.19
-KUBECTL: oc
-FAIRSHARE: true
diff --git a/setup.tmpl/TEAM-SETUP.md.tmpl b/setup.tmpl/TEAM-SETUP.md.tmpl
deleted file mode 100644
index 59476b0..0000000
--- a/setup.tmpl/TEAM-SETUP.md.tmpl
+++ /dev/null
@@ -1,126 +0,0 @@
-# Team Setup
-
-A *team* in MLBatch is a group of users that share a resource quota.
-
-Before setting up your teams and quotas, please read [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md)
-for a discussion of our recommended best practices.
-
-{{ if .RHOAI }}
-Setting up a new team requires the cluster admin to create a project,
-a user group, a quota, a queue, and the required role bindings as described below.
-
-Create project:
-```sh
-{{ .KUBECTL }} new-project team1
-```
-Create user group:
-```sh
-{{ .KUBECTL }} adm groups new team1-edit-group
-```
-Add users to group for example:
-```sh
-{{ .KUBECTL }} adm groups add-users team1-edit-group user1
-```
-Bind cluster role to group in namespace:
-```sh
-{{ .KUBECTL }} adm policy add-role-to-group mlbatch-edit team1-edit-group --role-namespace="" --namespace team1
-```
-{{- else -}}
-Setting up a new team requires the cluster admin to create a namespace,
-a quota, a queue, and the required role bindings as described below.
-
-{{ if .RHOAI }}
-Create the namespace:
-```sh
-{{ .KUBECTL }} create namespace team1
-```
-{{- else -}}
-Create and label the namespace:
-```sh
-{{ .KUBECTL }} create namespace team1
-{{ .KUBECTL }} label namespace team1 'mlbatch-team-namespace=true'
-```
-{{- end }}
-
-For each user on the team, create a RoleBinding:
-```sh
-{{ .KUBECTL }} -n team1 apply -f- << EOF
-kind: RoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: user-one
-subjects:
- - kind: User
- apiGroup: rbac.authorization.k8s.io
- name: user-one
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: mlbatch-edit
-EOF
-```
-{{- end }}
-
-Specify the intended quota for the namespace by creating a `ClusterQueue`:
-```sh
-{{ .KUBECTL }} apply -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
- name: team1-cluster-queue
-spec:
- namespaceSelector: {}
- cohort: default-cohort
- preemption:
- withinClusterQueue: LowerOrNewerEqualPriority
- reclaimWithinCohort: Any
- borrowWithinCohort:
- policy: Never
- resourceGroups:
- - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"]
- flavors:
- - name: default-flavor
- resources:
- - name: "cpu"
- nominalQuota: 8000m
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "memory"
- nominalQuota: 128Gi
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/gpu"
- nominalQuota: 16
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "nvidia.com/roce_gdr"
- nominalQuota: 4
- # borrowingLimit: 0
- # lendingLimit: 0
- - name: "pods"
- nominalQuota: 100
- # borrowingLimit: 0
- # lendingLimit: 0
-EOF
-```
-Edit the above quantities to adjust the quota to the desired values. Pod counts
-are optional and can be omitted from the list of covered resources.
-
-Uncomment all `borrowingLimit` lines to prevent this namespace from borrowing
-quota from other namespaces. Uncomment all `lendingLimit` lines to prevent other
-namespaces from borrowing quota from this namespace.
-
-Create a `LocalQueue` to bind the `ClusterQueue` to the namespace:
-```sh
-{{ .KUBECTL }} apply -n team1 -f- << EOF
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: LocalQueue
-metadata:
- name: default-queue
-spec:
- clusterQueue: team1-cluster-queue
-EOF
-```
-We recommend naming the local queue `default-queue` as `AppWrappers` will
-default to this queue name.
-
diff --git a/tools/appwrapper-packager/README.md b/tools/appwrapper-packager/README.md
deleted file mode 100644
index 02fce9e..0000000
--- a/tools/appwrapper-packager/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# AppWrapper Packager
-
-The Python script in this directory takes as input a YAML file
-containing one or more Kubernetes resources and generates
-an output YAML file with an AppWrapper containing the input
-resources.
-
-Example invocation:
-```sh
-./awpack.py -i input.yaml -o aw.yaml -n my-appwrapper
-```
-
-Usage information:
-```sh
-usage: awpack.py [-h] -i INPUT [-n NAME] [-o OUTPUT]
-
-Wrap Resources in an AppWrapper
-
-options:
- -h, --help show this help message and exit
- -i INPUT, --input INPUT
- input YAML file
- -n NAME, --name NAME name of AppWrapper
- -o OUTPUT, --output OUTPUT
- output file
-```
diff --git a/tools/appwrapper-packager/awpack.py b/tools/appwrapper-packager/awpack.py
deleted file mode 100755
index 0aaea4d..0000000
--- a/tools/appwrapper-packager/awpack.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import string
-import argparse
-from pathlib import Path
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="Wrap Resources in an AppWrapper"
- )
- parser.add_argument("-i", "--input", type=str, help="input YAML file", required=True)
- parser.add_argument("-n", "--name", type=str, help="name of AppWrapper", default="sample-appwrapper")
- parser.add_argument("-o", "--output", type=str, help="output file", default="aw.yaml")
- args = parser.parse_args()
-
- new_object = True
-
- with open(args.output, mode="w") as output_file:
- with open(args.input) as input_file:
- output_file.write("apiVersion: workload.codeflare.dev/v1beta2\n")
- output_file.write("kind: AppWrapper\n")
- output_file.write("metadata:\n")
- output_file.write(f" name: {args.name}\n")
- output_file.write(" labels:\n")
- output_file.write(" kueue.x-k8s.io/queue-name: default-queue\n")
- output_file.write("spec:\n")
- output_file.write(" components:\n")
- while True:
- line = input_file.readline()
- if not line:
- break
- if line.startswith("---"):
- new_object = True
- continue
- if line == "\n":
- continue
- if new_object:
- output_file.write(" - template:\n")
- new_object = False
- output_file.write(" "+line)
diff --git a/tools/cluster-checker/README.md b/tools/cluster-checker/README.md
deleted file mode 100644
index 4527fd1..0000000
--- a/tools/cluster-checker/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Cluster Checker
-
-The tool in this directory produces a summary view on GPU quotas and utilization
-on the cluster. It also diagnoses the state of a cluster looking for common
-issues.
-
-The tool is implemented in JavaScript and intended to run with Node.js.
-
-Install [Node.js](https://nodejs.org/) with the npm package manager.
-
-Install dependencies with:
-```sh
-npm install
-```
-
-Run the tool against the current Kubernetes context with:
-```sh
-node checker.js
-```
-```
-CLUSTER QUEUE GPU QUOTA GPU USAGE ADMITTED WORKLOADS PENDING WORKLOADS
-team1-cluster-queue 8 16 1 0
-team2-cluster-queue 8 4 4 0
-
-Total GPU count in cluster: 24
-Unschedulable GPU count: - 0
-Schedulable GPU count: = 24
-
-Nominal GPU quota: 16
-Slack GPU quota: + 8
-Total GPU quota: = 24
-
-GPU usage by admitted workloads: 20
-Borrowed GPU count: 8
-
-WARNING: workload "default/pytorchjob-job-e6381" refers to a non-existent local queue "test-queue"
-```
diff --git a/tools/cluster-checker/checker.js b/tools/cluster-checker/checker.js
deleted file mode 100644
index 85ac6cf..0000000
--- a/tools/cluster-checker/checker.js
+++ /dev/null
@@ -1,442 +0,0 @@
-'use strict'
-
-const k8s = require('@kubernetes/client-node')
-const k8srp = require('kubernetes-resource-parser')
-
-const nodeResources = {
- 'nvidia.com/gpu': 8,
- 'nvidia.com/roce_gdr': 2,
- cpu: 80,
- memory: '1100Gi'
-}
-
-class Client {
- constructor () {
- const config = new k8s.KubeConfig()
- config.loadFromDefault()
- config.getCurrentCluster().skipTLSVerify = true
- this.core = config.makeApiClient(k8s.CoreV1Api)
- this.custom = config.makeApiClient(k8s.CustomObjectsApi)
- }
-
- async nodes () {
- const res = await this.core.listNode()
- return res.body.items
- }
-
- async namespaces () {
- const res = await this.core.listNamespace()
- return res.body.items
- }
-
- async pods (namespace) {
- const res = await this.core.listNamespacedPod(namespace)
- return res.body.items
- }
-
- async readConfigMap (name, namespace) {
- const res = await this.core.readNamespacedConfigMap(name, namespace)
- return res.body
- }
-
- async readOperatorConfig () {
- const options = [
- { ns: 'redhat-ods-applications', cm: 'codeflare-operator-config', key: 'config.yaml', f: cm => cm.appwrapper?.Config },
- { ns: 'mlbatch-system', cm: 'appwrapper-operator-config', key: 'config.yaml', f: cm => cm.appwrapper },
- { ns: 'appwrapper-system', cm: 'appwrapper-operator-config', key: 'config.yaml', f: cm => cm.appwrapper }
- ]
- for (const opt of options) {
- try {
- const configMap = await this.readConfigMap(opt.cm, opt.ns)
- const cm = k8s.loadYaml(configMap.data[opt.key])
- return opt.f(cm)
- } catch (error) {
- }
- }
- console.log('WARNING: Failed to read operator config')
- return {}
- }
-
- async clusterQueues () {
- const res = await this.custom.listClusterCustomObject(
- 'kueue.x-k8s.io',
- 'v1beta1',
- 'clusterqueues')
- return res.body.items
- }
-
- async localQueues (namespace) {
- const res = await this.custom.listNamespacedCustomObject(
- 'kueue.x-k8s.io',
- 'v1beta1',
- namespace,
- 'localqueues')
- return res.body.items
- }
-
- async workloads (namespace) {
- const res = await this.custom.listNamespacedCustomObject(
- 'kueue.x-k8s.io',
- 'v1beta1',
- namespace,
- 'workloads')
- return res.body.items
- }
-}
-
-// pad value with spaces to the left
-function pad (v, n) {
- return String(v ?? '').padStart(n)
-}
-
-// format and print table
-function printTable (table, kind, ...columns) {
- const widths = { name: kind.length } // column widths
- const names = Object.keys(table).sort() // object names
-
- // compute column widths
- for (const name of names) {
- widths.name = Math.max(widths.name, name.length)
- for (const column of columns) {
- widths[column[1]] = Math.max(widths[column[1]] ?? column[0].length, String(table[name][column[0]] ?? '').length)
- }
- }
-
- // print table header
- let header = kind.toUpperCase().padEnd(widths.name, ' ')
- for (const column of columns) {
- header += ' ' + pad(column[0].toUpperCase(), widths[column[1]])
- }
- console.log(header)
-
- // print table rows
- for (const name of names) {
- let row = name.padEnd(widths.name, ' ')
- for (const column of columns) {
- row += ' ' + pad(table[name][column[1]], widths[column[1]])
- }
- console.log(row)
- }
-}
-
-// return the number of GPUs reserved by the pod
-function reservation (pod) {
- if (pod.status?.phase === 'Succeeded' || pod.status?.phase === 'Failed') {
- return 0 // pod has already completed
- }
- let scheduled = false
- for (const condition of pod.status?.conditions ?? []) {
- if (condition.type === 'PodScheduled') {
- if (condition.status === 'True') {
- scheduled = true
- }
- break // PodScheduled condition may only appear once
- }
- }
- if (!scheduled) {
- return 0 // pod has not been scheduled yet
- }
- let gpus = 0
- // compute sum of container gpu limits
- for (const container of pod.spec.containers) {
- gpus += parseInt(container.resources?.limits?.['nvidia.com/gpu'] ?? '0')
- }
- // compute max with init container gpu limits
- for (const container of pod.spec.initContainers ?? []) {
- gpus = Math.max(gpus, parseInt(container.resources?.limits?.['nvidia.com/gpu'] ?? '0'))
- }
- return gpus
-}
-
-// check container resource requests against node_resources
-function checkContainerResources (namespace, workload, workloadReplicas, container) {
- // selectively merge limits into requests
- const resources = {}
- for (const k in container.resources?.requests ?? []) {
- resources[k] = container.resources.requests[k]
- }
- for (const k in container.resources?.limits ?? []) {
- if (!(k in resources)) {
- resources[k] = container.resources.limits[k]
- }
- }
-
- const gpus = parseInt(resources['nvidia.com/gpu'] ?? '0')
- const gdr = parseInt(resources['nvidia.com/roce_gdr'] ?? '0')
- const cpus = k8srp.cpuParser(resources.cpu ?? '0')
- const mem = k8srp.memoryParser(resources.memory ?? '0')
-
- // warn if the resource requests cannot be satisfied by a Node
- if (gpus > nodeResources['nvidia.com/gpu']) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting "${gpus} GPUs"`)
- }
- if (gdr > nodeResources.gdrPerNode) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting ${gdr} roce_gdr interfaces"`)
- }
- if (cpus > nodeResources.cpu) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting "${cpus} CPUs"`)
- }
- if (mem > k8srp.memoryParser(nodeResources.memory)) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting ${resources.memory} memory`)
- }
-
- // warn if the resource:GPU ratio is not proportional to Node resources
- if (gdr > 0 && ((gpus === 0) || (gpus / gdr < nodeResources['nvidia.com/gpu'] / nodeResources['nvidia.com/roce_gdr']))) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting ${gdr} roce_gdr but only ${gpus} GPUs`)
- }
- if (gpus > 0 && (cpus > 0) && (cpus / gpus > nodeResources.cpu / nodeResources['nvidia.com/gpu'])) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting ${cpus} cpus but only ${gpus} GPUs`)
- }
- if (gpus > 0 && (mem > 0) && (mem / gpus > k8srp.memoryParser(nodeResources.memory) / nodeResources['nvidia.com/gpu'])) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has a container requesting ${resources.memory} memory but only ${gpus} GPUs`)
- }
-
- // warn if other resource constraints are violated
- if (gdr > 0 && workloadReplicas < 2) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" is a single pod workload that is requesting ${gdr} roce_gdr`)
- }
-}
-
-// check user namespace
-async function checkUserNamespace (client, namespace, queues) {
- const workloads = await client.workloads(namespace.metadata.name)
-
- for (const workload of workloads) {
- // report invalid queue names
- const queueName = workload.spec.queueName
- if (queueName) {
- if (!queues.find(queue => queue.metadata.name === queueName)) {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" refers to a non-existent local queue "${queueName}"`)
- }
- } else {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" is missing a local queue name`)
- }
-
- // report high-priority workloads
- if (workload.spec.priorityClassName !== 'default-priority' && workload.spec.priorityClassName !== 'low-priority') {
- console.log(`NOTE: workload "${namespace.metadata.name}/${workload.metadata.name}" has priority "${workload.spec.priorityClassName}"`)
- }
-
- // report unusual conditions
- const conditions = {}
- for (const condition of workload.status?.conditions ?? []) {
- conditions[condition.type] = condition.status
- }
- if (conditions.Finished !== 'True') {
- if (conditions.Admitted === 'True' && conditions.PodsReady === 'False') {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has conditions Admitted=True and PodsReady=False`)
- }
- if (conditions.Evicted === 'True') {
- console.log(`WARNING: workload "${namespace.metadata.name}/${workload.metadata.name}" has condition Evicted=True`)
- }
- }
-
- // report misconfigured resource requests
- let replicas = 0
- for (const podSet of workload.spec?.podSets) {
- replicas += podSet.count ?? 0
- }
- for (const podSet of workload.spec?.podSets) {
- for (const ic of podSet.template?.spec?.initContainers ?? []) {
- checkContainerResources(namespace, workload, replicas, ic)
- }
- for (const c of podSet.template?.spec?.containers ?? []) {
- checkContainerResources(namespace, workload, replicas, c)
- }
- }
- }
-
- // report GPU pods using default scheduler
- const pods = await client.pods(namespace.metadata.name)
- for (const pod of pods) {
- if (pod.spec.schedulerName === 'default-scheduler' && reservation(pod) > 0) {
- console.log(`WARNING: pod "${namespace.metadata.name}/${pod.metadata.name}" is using default-scheduler`)
- }
- }
-}
-
-// check system namespace
-async function checkSystemNamespace (client, namespace, nodes) {
- const pods = await client.pods(namespace.metadata.name)
-
- for (const pod of pods) {
- // report GPU occupancy
- const gpus = reservation(pod)
- if (gpus) {
- const node = nodes.find(node => node.metadata.name === pod.spec.nodeName)
- console.log(`WARNING: pod "${namespace.metadata.name}/${pod.metadata.name}" occupies ${gpus} GPU(s)` +
- `on node "${pod.spec.nodeName}" with GPU taints noExecute=${node?.noExecute} and noSchedule=${node?.noSchedule}`)
- }
- }
-}
-
-async function main () {
- try {
- // initialize kubernetes client
- const client = new Client()
-
- let clusterGPUs = 0 // cluster capacity
- let noScheduleGPUs = 0 // no-schedule GPUs
- let noExecuteGPUs = 0 // no-execute GPUs
- let usedGPUs = 0 // GPU usage by admitted workloads
- let borrowedGPUs = 0 // GPU borrowed from the cohort
- let quotaGPUs = 0 // nominal GPU quota (excluding slack queue)
- let limitGPUs = 0 // lending limit on slack queue
- let slackGPUs = 0 // nominal GPU quota on slack queue
-
- const config = await client.readOperatorConfig()
- const taints = config.autopilot?.resourceTaints?.['nvidia.com/gpu']
- const slackQueueName = config.slackQueueName
-
- let newline = false
-
- // compute GPU counts
- const nodes = await client.nodes()
- for (const node of nodes) {
- const gpus = parseInt(node.status.capacity['nvidia.com/gpu'] ?? '0')
- if (gpus > 0) {
- clusterGPUs += gpus
- node.noSchedule = false
- node.noExecute = false
- for (const taint of taints ?? []) {
- if (node.metadata.labels?.[taint.key] === taint.value) {
- if (taint.effect === 'NoExecute') {
- console.log(`WARNING: node "${node.metadata.name}" has label "${taint.key}"="${taint.value}" with effect "${taint.effect}"`)
- newline = true
- node.noExecute = true
- } else if (taint.effect === 'NoSchedule') {
- console.log(`WARNING: node "${node.metadata.name}" has label "${taint.key}"="${taint.value}" with effect "${taint.effect}"`)
- newline = true
- node.noSchedule = true
- }
- }
- }
- for (const taint of node.spec.taints ?? []) {
- if (taint.effect === 'NoExecute') {
- console.log(`WARNING: node "${node.metadata.name}" has taint "${taint.key}" with effect "${taint.effect}"`)
- newline = true
- node.noExecute = true
- } else if (taint.effect === 'NoSchedule') {
- console.log(`WARNING: node "${node.metadata.name}" has taint "${taint.key}" with effect "${taint.effect}"`)
- newline = true
- node.noSchedule = true
- }
- }
- if (node.noExecute) {
- noExecuteGPUs += gpus
- } else if (node.noSchedule) { // no double counting
- noScheduleGPUs += gpus
- }
- }
- }
-
- if (newline) {
- console.log()
- }
-
- // collect cluster queue metrics
- const clusterQueues = await client.clusterQueues()
- const queues = {}
- for (const clusterQueue of clusterQueues) {
- const queue = {
- quota: 0,
- usage: 0,
- borrowed: 0,
- lendingLimit: 0,
- admitted: clusterQueue.status?.admittedWorkloads ?? 0,
- pending: clusterQueue.status?.pendingWorkloads ?? 0
- }
- for (const resourceGroup of clusterQueue.spec.resourceGroups) {
- if (resourceGroup.coveredResources.includes('nvidia.com/gpu')) {
- for (const flavor of resourceGroup.flavors) {
- for (const resource of flavor.resources) {
- if (resource.name === 'nvidia.com/gpu') {
- queue.quota += parseInt(resource.nominalQuota ?? '0')
- // lending limit is nominal quota if not set
- queue.lendingLimit += parseInt(resource.lendingLimit ?? resource.nominalQuota ?? '0')
- break // resource may only occur once in flavor
- }
- }
- }
- break // resource may only belong to one resource group
- }
- }
- for (const flavor of clusterQueue.status?.flavorsUsage ?? []) {
- for (const resource of flavor.resources) {
- if (resource.name === 'nvidia.com/gpu') {
- queue.usage += parseInt(resource.total ?? '0')
- queue.borrowed += parseInt(resource.borrowed ?? '0')
- break // resource may only occur once in flavor
- }
- }
- }
- usedGPUs += queue.usage
- borrowedGPUs += queue.borrowed
- if (clusterQueue.metadata.name === slackQueueName) {
- slackGPUs = queue.quota
- limitGPUs = queue.lendingLimit
- // do not include slack queue in table
- } else {
- quotaGPUs += queue.quota
- queues[clusterQueue.metadata.name] = queue
- }
- }
-
- // print cluster queue table
- printTable(queues, 'cluster queue', ['gpu quota', 'quota'], ['gpu usage', 'usage'],
- ['admitted workloads', 'admitted'], ['pending workloads', 'pending'])
- console.log()
-
- // print summary results
- const width = Math.max(String(clusterGPUs).length, String(quotaGPUs).length)
- console.log(`Total GPU count in cluster: ${pad(clusterGPUs, width)}`)
- console.log(`Unschedulable GPU count: - ${pad(noExecuteGPUs + noScheduleGPUs, width)}`)
- console.log(`Schedulable GPU count: = ${pad(clusterGPUs - noExecuteGPUs - noScheduleGPUs, width)}`)
- console.log()
- console.log(`Nominal GPU quota: ${pad(quotaGPUs, width)}`)
- console.log(`Maximum slack GPU quota: + ${pad(slackGPUs, width)}`)
- console.log(`Slack GPU quota adjustment: - ${pad(slackGPUs - limitGPUs, width)}`)
- console.log(`Current GPU quota: = ${pad(quotaGPUs + limitGPUs, width)}`)
- console.log()
- console.log(`GPU usage by admitted workloads: ${pad(usedGPUs, width)}`)
- console.log(`Borrowed GPU count: ${pad(borrowedGPUs, width)}`)
- console.log()
-
- if (quotaGPUs > clusterGPUs - noExecuteGPUs - noScheduleGPUs) {
- console.log('WARNING: nominal GPU quota is greater than schedulable GPU count')
- }
-
- if (quotaGPUs + slackGPUs < clusterGPUs) {
- console.log('WARNING: maximum GPU quota is lower than total GPU count')
- }
-
- if (quotaGPUs + slackGPUs > clusterGPUs) {
- console.log('WARNING: maximum GPU quota is greater than total GPU count')
- }
-
- // check all accessible namespaces
- const namespaces = await client.namespaces()
- for (const namespace of namespaces) {
- if (namespace.metadata.name.startsWith('openshift-')) {
- continue // skip openshift namespaces
- }
-
- let localQueues
- try {
- localQueues = await client.localQueues(namespace.metadata.name)
- } catch (err) {
- continue // skip inaccessible namespaces
- }
-
- if (localQueues.length === 0) {
- await checkSystemNamespace(client, namespace, nodes)
- } else {
- await checkUserNamespace(client, namespace, localQueues)
- }
- }
- } catch (err) {
- console.error(err)
- }
-}
-
-main()
diff --git a/tools/cluster-checker/package-lock.json b/tools/cluster-checker/package-lock.json
deleted file mode 100644
index 84aa97a..0000000
--- a/tools/cluster-checker/package-lock.json
+++ /dev/null
@@ -1,4502 +0,0 @@
-{
- "name": "cluster-checker",
- "lockfileVersion": 3,
- "requires": true,
- "packages": {
- "": {
- "dependencies": {
- "@kubernetes/client-node": "^0.21.0",
- "kubernetes-resource-parser": "0.1.0"
- },
- "devDependencies": {
- "standard": "^17.1.2"
- }
- },
- "node_modules/@eslint-community/eslint-utils": {
- "version": "4.4.0",
- "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
- "integrity": "sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA==",
- "dev": true,
- "dependencies": {
- "eslint-visitor-keys": "^3.3.0"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "peerDependencies": {
- "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0"
- }
- },
- "node_modules/@eslint-community/regexpp": {
- "version": "4.11.1",
- "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.11.1.tgz",
- "integrity": "sha512-m4DVN9ZqskZoLU5GlWZadwDnYo3vAEydiUayB9widCl9ffWx2IvPnp6n3on5rJmziJSw9Bv+Z3ChDVdMwXCY8Q==",
- "dev": true,
- "engines": {
- "node": "^12.0.0 || ^14.0.0 || >=16.0.0"
- }
- },
- "node_modules/@eslint/eslintrc": {
- "version": "2.1.4",
- "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.4.tgz",
- "integrity": "sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ==",
- "dev": true,
- "dependencies": {
- "ajv": "^6.12.4",
- "debug": "^4.3.2",
- "espree": "^9.6.0",
- "globals": "^13.19.0",
- "ignore": "^5.2.0",
- "import-fresh": "^3.2.1",
- "js-yaml": "^4.1.0",
- "minimatch": "^3.1.2",
- "strip-json-comments": "^3.1.1"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- }
- },
- "node_modules/@eslint/eslintrc/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/@eslint/eslintrc/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/@eslint/js": {
- "version": "8.57.1",
- "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.1.tgz",
- "integrity": "sha512-d9zaMRSTIKDLhctzH12MtXvJKSSUhaHcjV+2Z+GK+EEY7XKpP5yR4x+N3TAcHTcu963nIr+TMcCb4DBCYX1z6Q==",
- "dev": true,
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- }
- },
- "node_modules/@humanwhocodes/config-array": {
- "version": "0.13.0",
- "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
- "integrity": "sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw==",
- "deprecated": "Use @eslint/config-array instead",
- "dev": true,
- "dependencies": {
- "@humanwhocodes/object-schema": "^2.0.3",
- "debug": "^4.3.1",
- "minimatch": "^3.0.5"
- },
- "engines": {
- "node": ">=10.10.0"
- }
- },
- "node_modules/@humanwhocodes/config-array/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/@humanwhocodes/config-array/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/@humanwhocodes/module-importer": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz",
- "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==",
- "dev": true,
- "engines": {
- "node": ">=12.22"
- },
- "funding": {
- "type": "github",
- "url": "https://github.com/sponsors/nzakas"
- }
- },
- "node_modules/@humanwhocodes/object-schema": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz",
- "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==",
- "deprecated": "Use @eslint/object-schema instead",
- "dev": true
- },
- "node_modules/@isaacs/cliui": {
- "version": "8.0.2",
- "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
- "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
- "dependencies": {
- "string-width": "^5.1.2",
- "string-width-cjs": "npm:string-width@^4.2.0",
- "strip-ansi": "^7.0.1",
- "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
- "wrap-ansi": "^8.1.0",
- "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
- },
- "engines": {
- "node": ">=12"
- }
- },
- "node_modules/@isaacs/fs-minipass": {
- "version": "4.0.1",
- "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
- "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
- "dependencies": {
- "minipass": "^7.0.4"
- },
- "engines": {
- "node": ">=18.0.0"
- }
- },
- "node_modules/@kubernetes/client-node": {
- "version": "0.21.0",
- "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-0.21.0.tgz",
- "integrity": "sha512-yYRbgMeyQbvZDHt/ZqsW3m4lRefzhbbJEuj8sVXM+bufKrgmzriA2oq7lWPH/k/LQIicAME9ixPUadTrxIF6dQ==",
- "dependencies": {
- "@types/js-yaml": "^4.0.1",
- "@types/node": "^20.1.1",
- "@types/request": "^2.47.1",
- "@types/ws": "^8.5.3",
- "byline": "^5.0.0",
- "isomorphic-ws": "^5.0.0",
- "js-yaml": "^4.1.0",
- "jsonpath-plus": "^8.0.0",
- "request": "^2.88.0",
- "rfc4648": "^1.3.0",
- "stream-buffers": "^3.0.2",
- "tar": "^7.0.0",
- "tslib": "^2.4.1",
- "ws": "^8.11.0"
- },
- "optionalDependencies": {
- "openid-client": "^5.3.0"
- }
- },
- "node_modules/@nodelib/fs.scandir": {
- "version": "2.1.5",
- "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
- "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
- "dev": true,
- "dependencies": {
- "@nodelib/fs.stat": "2.0.5",
- "run-parallel": "^1.1.9"
- },
- "engines": {
- "node": ">= 8"
- }
- },
- "node_modules/@nodelib/fs.stat": {
- "version": "2.0.5",
- "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
- "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
- "dev": true,
- "engines": {
- "node": ">= 8"
- }
- },
- "node_modules/@nodelib/fs.walk": {
- "version": "1.2.8",
- "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
- "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
- "dev": true,
- "dependencies": {
- "@nodelib/fs.scandir": "2.1.5",
- "fastq": "^1.6.0"
- },
- "engines": {
- "node": ">= 8"
- }
- },
- "node_modules/@pkgjs/parseargs": {
- "version": "0.11.0",
- "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
- "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
- "optional": true,
- "engines": {
- "node": ">=14"
- }
- },
- "node_modules/@rtsao/scc": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@rtsao/scc/-/scc-1.1.0.tgz",
- "integrity": "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==",
- "dev": true
- },
- "node_modules/@types/caseless": {
- "version": "0.12.5",
- "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.5.tgz",
- "integrity": "sha512-hWtVTC2q7hc7xZ/RLbxapMvDMgUnDvKvMOpKal4DrMyfGBUfB1oKaZlIRr6mJL+If3bAP6sV/QneGzF6tJjZDg=="
- },
- "node_modules/@types/js-yaml": {
- "version": "4.0.9",
- "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
- "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
- },
- "node_modules/@types/json5": {
- "version": "0.0.29",
- "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
- "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==",
- "dev": true
- },
- "node_modules/@types/node": {
- "version": "20.16.2",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.2.tgz",
- "integrity": "sha512-91s/n4qUPV/wg8eE9KHYW1kouTfDk2FPGjXbBMfRWP/2vg1rCXNQL1OCabwGs0XSdukuK+MwCDXE30QpSeMUhQ==",
- "dependencies": {
- "undici-types": "~6.19.2"
- }
- },
- "node_modules/@types/request": {
- "version": "2.48.12",
- "resolved": "https://registry.npmjs.org/@types/request/-/request-2.48.12.tgz",
- "integrity": "sha512-G3sY+NpsA9jnwm0ixhAFQSJ3Q9JkpLZpJbI3GMv0mIAT0y3mRabYeINzal5WOChIiaTEGQYlHOKgkaM9EisWHw==",
- "dependencies": {
- "@types/caseless": "*",
- "@types/node": "*",
- "@types/tough-cookie": "*",
- "form-data": "^2.5.0"
- }
- },
- "node_modules/@types/tough-cookie": {
- "version": "4.0.5",
- "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
- "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA=="
- },
- "node_modules/@types/ws": {
- "version": "8.5.12",
- "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.12.tgz",
- "integrity": "sha512-3tPRkv1EtkDpzlgyKyI8pGsGZAGPEaXeu0DOj5DI25Ja91bdAYddYHbADRYVrZMRbfW+1l5YwXVDKohDJNQxkQ==",
- "dependencies": {
- "@types/node": "*"
- }
- },
- "node_modules/@ungap/structured-clone": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz",
- "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==",
- "dev": true
- },
- "node_modules/acorn": {
- "version": "8.12.1",
- "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
- "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
- "dev": true,
- "bin": {
- "acorn": "bin/acorn"
- },
- "engines": {
- "node": ">=0.4.0"
- }
- },
- "node_modules/acorn-jsx": {
- "version": "5.3.2",
- "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
- "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
- "dev": true,
- "peerDependencies": {
- "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
- }
- },
- "node_modules/ajv": {
- "version": "6.12.6",
- "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
- "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
- "dependencies": {
- "fast-deep-equal": "^3.1.1",
- "fast-json-stable-stringify": "^2.0.0",
- "json-schema-traverse": "^0.4.1",
- "uri-js": "^4.2.2"
- },
- "funding": {
- "type": "github",
- "url": "https://github.com/sponsors/epoberezkin"
- }
- },
- "node_modules/ansi-regex": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
- "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-regex?sponsor=1"
- }
- },
- "node_modules/ansi-styles": {
- "version": "6.2.1",
- "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
- "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-styles?sponsor=1"
- }
- },
- "node_modules/argparse": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
- "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
- },
- "node_modules/array-buffer-byte-length": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
- "integrity": "sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.5",
- "is-array-buffer": "^3.0.4"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array-includes": {
- "version": "3.1.8",
- "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz",
- "integrity": "sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2",
- "es-object-atoms": "^1.0.0",
- "get-intrinsic": "^1.2.4",
- "is-string": "^1.0.7"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array.prototype.findlast": {
- "version": "1.2.5",
- "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz",
- "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2",
- "es-errors": "^1.3.0",
- "es-object-atoms": "^1.0.0",
- "es-shim-unscopables": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array.prototype.findlastindex": {
- "version": "1.2.5",
- "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.5.tgz",
- "integrity": "sha512-zfETvRFA8o7EiNn++N5f/kaCw221hrpGsDmcpndVupkPzEc1Wuf3VgC0qby1BbHs7f5DVYjgtEU2LLh5bqeGfQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2",
- "es-errors": "^1.3.0",
- "es-object-atoms": "^1.0.0",
- "es-shim-unscopables": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array.prototype.flat": {
- "version": "1.3.2",
- "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.2.tgz",
- "integrity": "sha512-djYB+Zx2vLewY8RWlNCUdHjDXs2XOgm602S9E7P/UpHgfeHL00cRiIF+IN/G/aUJ7kGPb6yO/ErDI5V2s8iycA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "define-properties": "^1.2.0",
- "es-abstract": "^1.22.1",
- "es-shim-unscopables": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array.prototype.flatmap": {
- "version": "1.3.2",
- "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.2.tgz",
- "integrity": "sha512-Ewyx0c9PmpcsByhSW4r+9zDU7sGjFc86qf/kKtuSCRdhfbk0SNLLkaT5qvcHnRGgc5NP/ly/y+qkXkqONX54CQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "define-properties": "^1.2.0",
- "es-abstract": "^1.22.1",
- "es-shim-unscopables": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/array.prototype.tosorted": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz",
- "integrity": "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.3",
- "es-errors": "^1.3.0",
- "es-shim-unscopables": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/arraybuffer.prototype.slice": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.3.tgz",
- "integrity": "sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A==",
- "dev": true,
- "dependencies": {
- "array-buffer-byte-length": "^1.0.1",
- "call-bind": "^1.0.5",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.22.3",
- "es-errors": "^1.2.1",
- "get-intrinsic": "^1.2.3",
- "is-array-buffer": "^3.0.4",
- "is-shared-array-buffer": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/asn1": {
- "version": "0.2.6",
- "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz",
- "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==",
- "dependencies": {
- "safer-buffer": "~2.1.0"
- }
- },
- "node_modules/assert-plus": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
- "integrity": "sha512-NfJ4UzBCcQGLDlQq7nHxH+tv3kyZ0hHQqF5BO6J7tNJeP5do1llPr8dZ8zHonfhAu0PHAdMkSo+8o0wxg9lZWw==",
- "engines": {
- "node": ">=0.8"
- }
- },
- "node_modules/asynckit": {
- "version": "0.4.0",
- "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
- "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
- },
- "node_modules/available-typed-arrays": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz",
- "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==",
- "dev": true,
- "dependencies": {
- "possible-typed-array-names": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/aws-sign2": {
- "version": "0.7.0",
- "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
- "integrity": "sha512-08kcGqnYf/YmjoRhfxyu+CLxBjUtHLXLXX/vUfx9l2LYzG3c1m61nrpyFUZI6zeS+Li/wWMMidD9KgrqtGq3mA==",
- "engines": {
- "node": "*"
- }
- },
- "node_modules/aws4": {
- "version": "1.13.2",
- "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.13.2.tgz",
- "integrity": "sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw=="
- },
- "node_modules/balanced-match": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
- "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
- },
- "node_modules/bcrypt-pbkdf": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
- "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==",
- "dependencies": {
- "tweetnacl": "^0.14.3"
- }
- },
- "node_modules/brace-expansion": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
- "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
- "dependencies": {
- "balanced-match": "^1.0.0"
- }
- },
- "node_modules/builtins": {
- "version": "5.1.0",
- "resolved": "https://registry.npmjs.org/builtins/-/builtins-5.1.0.tgz",
- "integrity": "sha512-SW9lzGTLvWTP1AY8xeAMZimqDrIaSdLQUcVr9DMef51niJ022Ri87SwRRKYm4A6iHfkPaiVUu/Duw2Wc4J7kKg==",
- "dev": true,
- "dependencies": {
- "semver": "^7.0.0"
- }
- },
- "node_modules/builtins/node_modules/semver": {
- "version": "7.6.3",
- "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
- "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
- "dev": true,
- "bin": {
- "semver": "bin/semver.js"
- },
- "engines": {
- "node": ">=10"
- }
- },
- "node_modules/byline": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/byline/-/byline-5.0.0.tgz",
- "integrity": "sha512-s6webAy+R4SR8XVuJWt2V2rGvhnrhxN+9S15GNuTK3wKPOXFF6RNc+8ug2XhH+2s4f+uudG4kUVYmYOQWL2g0Q==",
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/call-bind": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
- "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
- "dev": true,
- "dependencies": {
- "es-define-property": "^1.0.0",
- "es-errors": "^1.3.0",
- "function-bind": "^1.1.2",
- "get-intrinsic": "^1.2.4",
- "set-function-length": "^1.2.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/callsites": {
- "version": "3.1.0",
- "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
- "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
- "dev": true,
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/caseless": {
- "version": "0.12.0",
- "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
- "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="
- },
- "node_modules/chalk": {
- "version": "4.1.2",
- "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
- "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
- "dev": true,
- "dependencies": {
- "ansi-styles": "^4.1.0",
- "supports-color": "^7.1.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/chalk/chalk?sponsor=1"
- }
- },
- "node_modules/chalk/node_modules/ansi-styles": {
- "version": "4.3.0",
- "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
- "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
- "dev": true,
- "dependencies": {
- "color-convert": "^2.0.1"
- },
- "engines": {
- "node": ">=8"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-styles?sponsor=1"
- }
- },
- "node_modules/chownr": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
- "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
- "engines": {
- "node": ">=18"
- }
- },
- "node_modules/color-convert": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
- "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
- "dependencies": {
- "color-name": "~1.1.4"
- },
- "engines": {
- "node": ">=7.0.0"
- }
- },
- "node_modules/color-name": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
- "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
- },
- "node_modules/combined-stream": {
- "version": "1.0.8",
- "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
- "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
- "dependencies": {
- "delayed-stream": "~1.0.0"
- },
- "engines": {
- "node": ">= 0.8"
- }
- },
- "node_modules/concat-map": {
- "version": "0.0.1",
- "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
- "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
- "dev": true
- },
- "node_modules/core-util-is": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
- "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ=="
- },
- "node_modules/cross-spawn": {
- "version": "7.0.3",
- "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
- "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
- "dependencies": {
- "path-key": "^3.1.0",
- "shebang-command": "^2.0.0",
- "which": "^2.0.1"
- },
- "engines": {
- "node": ">= 8"
- }
- },
- "node_modules/dashdash": {
- "version": "1.14.1",
- "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
- "integrity": "sha512-jRFi8UDGo6j+odZiEpjazZaWqEal3w/basFjQHQEwVtZJGDpxbH1MeYluwCS8Xq5wmLJooDlMgvVarmWfGM44g==",
- "dependencies": {
- "assert-plus": "^1.0.0"
- },
- "engines": {
- "node": ">=0.10"
- }
- },
- "node_modules/data-view-buffer": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.1.tgz",
- "integrity": "sha512-0lht7OugA5x3iJLOWFhWK/5ehONdprk0ISXqVFn/NFrDu+cuc8iADFrGQz5BnRK7LLU3JmkbXSxaqX+/mXYtUA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.6",
- "es-errors": "^1.3.0",
- "is-data-view": "^1.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/data-view-byte-length": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.1.tgz",
- "integrity": "sha512-4J7wRJD3ABAzr8wP+OcIcqq2dlUKp4DVflx++hs5h5ZKydWMI6/D/fAot+yh6g2tHh8fLFTvNOaVN357NvSrOQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "es-errors": "^1.3.0",
- "is-data-view": "^1.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/data-view-byte-offset": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.0.tgz",
- "integrity": "sha512-t/Ygsytq+R995EJ5PZlD4Cu56sWa8InXySaViRzw9apusqsOO2bQP+SbYzAhR0pFKoB+43lYy8rWban9JSuXnA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.6",
- "es-errors": "^1.3.0",
- "is-data-view": "^1.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/debug": {
- "version": "4.3.7",
- "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
- "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
- "dev": true,
- "dependencies": {
- "ms": "^2.1.3"
- },
- "engines": {
- "node": ">=6.0"
- },
- "peerDependenciesMeta": {
- "supports-color": {
- "optional": true
- }
- }
- },
- "node_modules/deep-is": {
- "version": "0.1.4",
- "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
- "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
- "dev": true
- },
- "node_modules/define-data-property": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
- "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
- "dev": true,
- "dependencies": {
- "es-define-property": "^1.0.0",
- "es-errors": "^1.3.0",
- "gopd": "^1.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/define-properties": {
- "version": "1.2.1",
- "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
- "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
- "dev": true,
- "dependencies": {
- "define-data-property": "^1.0.1",
- "has-property-descriptors": "^1.0.0",
- "object-keys": "^1.1.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/delayed-stream": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
- "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
- "engines": {
- "node": ">=0.4.0"
- }
- },
- "node_modules/doctrine": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
- "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
- "dev": true,
- "dependencies": {
- "esutils": "^2.0.2"
- },
- "engines": {
- "node": ">=6.0.0"
- }
- },
- "node_modules/eastasianwidth": {
- "version": "0.2.0",
- "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
- "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
- },
- "node_modules/ecc-jsbn": {
- "version": "0.1.2",
- "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
- "integrity": "sha512-eh9O+hwRHNbG4BLTjEl3nw044CkGm5X6LoaCf7LPp7UU8Qrt47JYNi6nPX8xjW97TKGKm1ouctg0QSpZe9qrnw==",
- "dependencies": {
- "jsbn": "~0.1.0",
- "safer-buffer": "^2.1.0"
- }
- },
- "node_modules/emoji-regex": {
- "version": "9.2.2",
- "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
- "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
- },
- "node_modules/error-ex": {
- "version": "1.3.2",
- "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
- "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==",
- "dev": true,
- "dependencies": {
- "is-arrayish": "^0.2.1"
- }
- },
- "node_modules/es-abstract": {
- "version": "1.23.3",
- "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.3.tgz",
- "integrity": "sha512-e+HfNH61Bj1X9/jLc5v1owaLYuHdeHHSQlkhCBiTK8rBvKaULl/beGMxwrMXjpYrv4pz22BlY570vVePA2ho4A==",
- "dev": true,
- "dependencies": {
- "array-buffer-byte-length": "^1.0.1",
- "arraybuffer.prototype.slice": "^1.0.3",
- "available-typed-arrays": "^1.0.7",
- "call-bind": "^1.0.7",
- "data-view-buffer": "^1.0.1",
- "data-view-byte-length": "^1.0.1",
- "data-view-byte-offset": "^1.0.0",
- "es-define-property": "^1.0.0",
- "es-errors": "^1.3.0",
- "es-object-atoms": "^1.0.0",
- "es-set-tostringtag": "^2.0.3",
- "es-to-primitive": "^1.2.1",
- "function.prototype.name": "^1.1.6",
- "get-intrinsic": "^1.2.4",
- "get-symbol-description": "^1.0.2",
- "globalthis": "^1.0.3",
- "gopd": "^1.0.1",
- "has-property-descriptors": "^1.0.2",
- "has-proto": "^1.0.3",
- "has-symbols": "^1.0.3",
- "hasown": "^2.0.2",
- "internal-slot": "^1.0.7",
- "is-array-buffer": "^3.0.4",
- "is-callable": "^1.2.7",
- "is-data-view": "^1.0.1",
- "is-negative-zero": "^2.0.3",
- "is-regex": "^1.1.4",
- "is-shared-array-buffer": "^1.0.3",
- "is-string": "^1.0.7",
- "is-typed-array": "^1.1.13",
- "is-weakref": "^1.0.2",
- "object-inspect": "^1.13.1",
- "object-keys": "^1.1.1",
- "object.assign": "^4.1.5",
- "regexp.prototype.flags": "^1.5.2",
- "safe-array-concat": "^1.1.2",
- "safe-regex-test": "^1.0.3",
- "string.prototype.trim": "^1.2.9",
- "string.prototype.trimend": "^1.0.8",
- "string.prototype.trimstart": "^1.0.8",
- "typed-array-buffer": "^1.0.2",
- "typed-array-byte-length": "^1.0.1",
- "typed-array-byte-offset": "^1.0.2",
- "typed-array-length": "^1.0.6",
- "unbox-primitive": "^1.0.2",
- "which-typed-array": "^1.1.15"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/es-define-property": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
- "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
- "dev": true,
- "dependencies": {
- "get-intrinsic": "^1.2.4"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/es-errors": {
- "version": "1.3.0",
- "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
- "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/es-iterator-helpers": {
- "version": "1.0.19",
- "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.19.tgz",
- "integrity": "sha512-zoMwbCcH5hwUkKJkT8kDIBZSz9I6mVG//+lDCinLCGov4+r7NIy0ld8o03M0cJxl2spVf6ESYVS6/gpIfq1FFw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.3",
- "es-errors": "^1.3.0",
- "es-set-tostringtag": "^2.0.3",
- "function-bind": "^1.1.2",
- "get-intrinsic": "^1.2.4",
- "globalthis": "^1.0.3",
- "has-property-descriptors": "^1.0.2",
- "has-proto": "^1.0.3",
- "has-symbols": "^1.0.3",
- "internal-slot": "^1.0.7",
- "iterator.prototype": "^1.1.2",
- "safe-array-concat": "^1.1.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/es-object-atoms": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.0.0.tgz",
- "integrity": "sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==",
- "dev": true,
- "dependencies": {
- "es-errors": "^1.3.0"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/es-set-tostringtag": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz",
- "integrity": "sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ==",
- "dev": true,
- "dependencies": {
- "get-intrinsic": "^1.2.4",
- "has-tostringtag": "^1.0.2",
- "hasown": "^2.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/es-shim-unscopables": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/es-shim-unscopables/-/es-shim-unscopables-1.0.2.tgz",
- "integrity": "sha512-J3yBRXCzDu4ULnQwxyToo/OjdMx6akgVC7K6few0a7F/0wLtmKKN7I73AH5T2836UuXRqN7Qg+IIUw/+YJksRw==",
- "dev": true,
- "dependencies": {
- "hasown": "^2.0.0"
- }
- },
- "node_modules/es-to-primitive": {
- "version": "1.2.1",
- "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz",
- "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==",
- "dev": true,
- "dependencies": {
- "is-callable": "^1.1.4",
- "is-date-object": "^1.0.1",
- "is-symbol": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/escape-string-regexp": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
- "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
- "dev": true,
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/eslint": {
- "version": "8.57.1",
- "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.1.tgz",
- "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==",
- "dev": true,
- "dependencies": {
- "@eslint-community/eslint-utils": "^4.2.0",
- "@eslint-community/regexpp": "^4.6.1",
- "@eslint/eslintrc": "^2.1.4",
- "@eslint/js": "8.57.1",
- "@humanwhocodes/config-array": "^0.13.0",
- "@humanwhocodes/module-importer": "^1.0.1",
- "@nodelib/fs.walk": "^1.2.8",
- "@ungap/structured-clone": "^1.2.0",
- "ajv": "^6.12.4",
- "chalk": "^4.0.0",
- "cross-spawn": "^7.0.2",
- "debug": "^4.3.2",
- "doctrine": "^3.0.0",
- "escape-string-regexp": "^4.0.0",
- "eslint-scope": "^7.2.2",
- "eslint-visitor-keys": "^3.4.3",
- "espree": "^9.6.1",
- "esquery": "^1.4.2",
- "esutils": "^2.0.2",
- "fast-deep-equal": "^3.1.3",
- "file-entry-cache": "^6.0.1",
- "find-up": "^5.0.0",
- "glob-parent": "^6.0.2",
- "globals": "^13.19.0",
- "graphemer": "^1.4.0",
- "ignore": "^5.2.0",
- "imurmurhash": "^0.1.4",
- "is-glob": "^4.0.0",
- "is-path-inside": "^3.0.3",
- "js-yaml": "^4.1.0",
- "json-stable-stringify-without-jsonify": "^1.0.1",
- "levn": "^0.4.1",
- "lodash.merge": "^4.6.2",
- "minimatch": "^3.1.2",
- "natural-compare": "^1.4.0",
- "optionator": "^0.9.3",
- "strip-ansi": "^6.0.1",
- "text-table": "^0.2.0"
- },
- "bin": {
- "eslint": "bin/eslint.js"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- }
- },
- "node_modules/eslint-config-standard": {
- "version": "17.1.0",
- "resolved": "https://registry.npmjs.org/eslint-config-standard/-/eslint-config-standard-17.1.0.tgz",
- "integrity": "sha512-IwHwmaBNtDK4zDHQukFDW5u/aTb8+meQWZvNFWkiGmbWjD6bqyuSSBxxXKkCftCUzc1zwCH2m/baCNDLGmuO5Q==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ],
- "engines": {
- "node": ">=12.0.0"
- },
- "peerDependencies": {
- "eslint": "^8.0.1",
- "eslint-plugin-import": "^2.25.2",
- "eslint-plugin-n": "^15.0.0 || ^16.0.0 ",
- "eslint-plugin-promise": "^6.0.0"
- }
- },
- "node_modules/eslint-config-standard-jsx": {
- "version": "11.0.0",
- "resolved": "https://registry.npmjs.org/eslint-config-standard-jsx/-/eslint-config-standard-jsx-11.0.0.tgz",
- "integrity": "sha512-+1EV/R0JxEK1L0NGolAr8Iktm3Rgotx3BKwgaX+eAuSX8D952LULKtjgZD3F+e6SvibONnhLwoTi9DPxN5LvvQ==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ],
- "peerDependencies": {
- "eslint": "^8.8.0",
- "eslint-plugin-react": "^7.28.0"
- }
- },
- "node_modules/eslint-import-resolver-node": {
- "version": "0.3.9",
- "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz",
- "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==",
- "dev": true,
- "dependencies": {
- "debug": "^3.2.7",
- "is-core-module": "^2.13.0",
- "resolve": "^1.22.4"
- }
- },
- "node_modules/eslint-import-resolver-node/node_modules/debug": {
- "version": "3.2.7",
- "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
- "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
- "dev": true,
- "dependencies": {
- "ms": "^2.1.1"
- }
- },
- "node_modules/eslint-module-utils": {
- "version": "2.12.0",
- "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.12.0.tgz",
- "integrity": "sha512-wALZ0HFoytlyh/1+4wuZ9FJCD/leWHQzzrxJ8+rebyReSLk7LApMyd3WJaLVoN+D5+WIdJyDK1c6JnE65V4Zyg==",
- "dev": true,
- "dependencies": {
- "debug": "^3.2.7"
- },
- "engines": {
- "node": ">=4"
- },
- "peerDependenciesMeta": {
- "eslint": {
- "optional": true
- }
- }
- },
- "node_modules/eslint-module-utils/node_modules/debug": {
- "version": "3.2.7",
- "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
- "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
- "dev": true,
- "dependencies": {
- "ms": "^2.1.1"
- }
- },
- "node_modules/eslint-plugin-es": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/eslint-plugin-es/-/eslint-plugin-es-4.1.0.tgz",
- "integrity": "sha512-GILhQTnjYE2WorX5Jyi5i4dz5ALWxBIdQECVQavL6s7cI76IZTDWleTHkxz/QT3kvcs2QlGHvKLYsSlPOlPXnQ==",
- "dev": true,
- "dependencies": {
- "eslint-utils": "^2.0.0",
- "regexpp": "^3.0.0"
- },
- "engines": {
- "node": ">=8.10.0"
- },
- "funding": {
- "url": "https://github.com/sponsors/mysticatea"
- },
- "peerDependencies": {
- "eslint": ">=4.19.1"
- }
- },
- "node_modules/eslint-plugin-es/node_modules/eslint-utils": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz",
- "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==",
- "dev": true,
- "dependencies": {
- "eslint-visitor-keys": "^1.1.0"
- },
- "engines": {
- "node": ">=6"
- },
- "funding": {
- "url": "https://github.com/sponsors/mysticatea"
- }
- },
- "node_modules/eslint-plugin-es/node_modules/eslint-visitor-keys": {
- "version": "1.3.0",
- "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
- "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
- "dev": true,
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/eslint-plugin-import": {
- "version": "2.30.0",
- "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.30.0.tgz",
- "integrity": "sha512-/mHNE9jINJfiD2EKkg1BKyPyUk4zdnT54YgbOgfjSakWT5oyX/qQLVNTkehyfpcMxZXMy1zyonZ2v7hZTX43Yw==",
- "dev": true,
- "dependencies": {
- "@rtsao/scc": "^1.1.0",
- "array-includes": "^3.1.8",
- "array.prototype.findlastindex": "^1.2.5",
- "array.prototype.flat": "^1.3.2",
- "array.prototype.flatmap": "^1.3.2",
- "debug": "^3.2.7",
- "doctrine": "^2.1.0",
- "eslint-import-resolver-node": "^0.3.9",
- "eslint-module-utils": "^2.9.0",
- "hasown": "^2.0.2",
- "is-core-module": "^2.15.1",
- "is-glob": "^4.0.3",
- "minimatch": "^3.1.2",
- "object.fromentries": "^2.0.8",
- "object.groupby": "^1.0.3",
- "object.values": "^1.2.0",
- "semver": "^6.3.1",
- "tsconfig-paths": "^3.15.0"
- },
- "engines": {
- "node": ">=4"
- },
- "peerDependencies": {
- "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0 || ^8"
- }
- },
- "node_modules/eslint-plugin-import/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/eslint-plugin-import/node_modules/debug": {
- "version": "3.2.7",
- "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
- "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
- "dev": true,
- "dependencies": {
- "ms": "^2.1.1"
- }
- },
- "node_modules/eslint-plugin-import/node_modules/doctrine": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz",
- "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==",
- "dev": true,
- "dependencies": {
- "esutils": "^2.0.2"
- },
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/eslint-plugin-import/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/eslint-plugin-n": {
- "version": "15.7.0",
- "resolved": "https://registry.npmjs.org/eslint-plugin-n/-/eslint-plugin-n-15.7.0.tgz",
- "integrity": "sha512-jDex9s7D/Qial8AGVIHq4W7NswpUD5DPDL2RH8Lzd9EloWUuvUkHfv4FRLMipH5q2UtyurorBkPeNi1wVWNh3Q==",
- "dev": true,
- "dependencies": {
- "builtins": "^5.0.1",
- "eslint-plugin-es": "^4.1.0",
- "eslint-utils": "^3.0.0",
- "ignore": "^5.1.1",
- "is-core-module": "^2.11.0",
- "minimatch": "^3.1.2",
- "resolve": "^1.22.1",
- "semver": "^7.3.8"
- },
- "engines": {
- "node": ">=12.22.0"
- },
- "funding": {
- "url": "https://github.com/sponsors/mysticatea"
- },
- "peerDependencies": {
- "eslint": ">=7.0.0"
- }
- },
- "node_modules/eslint-plugin-n/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/eslint-plugin-n/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/eslint-plugin-n/node_modules/semver": {
- "version": "7.6.3",
- "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
- "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
- "dev": true,
- "bin": {
- "semver": "bin/semver.js"
- },
- "engines": {
- "node": ">=10"
- }
- },
- "node_modules/eslint-plugin-promise": {
- "version": "6.6.0",
- "resolved": "https://registry.npmjs.org/eslint-plugin-promise/-/eslint-plugin-promise-6.6.0.tgz",
- "integrity": "sha512-57Zzfw8G6+Gq7axm2Pdo3gW/Rx3h9Yywgn61uE/3elTCOePEHVrn2i5CdfBwA1BLK0Q0WqctICIUSqXZW/VprQ==",
- "dev": true,
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- },
- "peerDependencies": {
- "eslint": "^7.0.0 || ^8.0.0 || ^9.0.0"
- }
- },
- "node_modules/eslint-plugin-react": {
- "version": "7.37.0",
- "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.37.0.tgz",
- "integrity": "sha512-IHBePmfWH5lKhJnJ7WB1V+v/GolbB0rjS8XYVCSQCZKaQCAUhMoVoOEn1Ef8Z8Wf0a7l8KTJvuZg5/e4qrZ6nA==",
- "dev": true,
- "dependencies": {
- "array-includes": "^3.1.8",
- "array.prototype.findlast": "^1.2.5",
- "array.prototype.flatmap": "^1.3.2",
- "array.prototype.tosorted": "^1.1.4",
- "doctrine": "^2.1.0",
- "es-iterator-helpers": "^1.0.19",
- "estraverse": "^5.3.0",
- "hasown": "^2.0.2",
- "jsx-ast-utils": "^2.4.1 || ^3.0.0",
- "minimatch": "^3.1.2",
- "object.entries": "^1.1.8",
- "object.fromentries": "^2.0.8",
- "object.values": "^1.2.0",
- "prop-types": "^15.8.1",
- "resolve": "^2.0.0-next.5",
- "semver": "^6.3.1",
- "string.prototype.matchall": "^4.0.11",
- "string.prototype.repeat": "^1.0.0"
- },
- "engines": {
- "node": ">=4"
- },
- "peerDependencies": {
- "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9.7"
- }
- },
- "node_modules/eslint-plugin-react/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/eslint-plugin-react/node_modules/doctrine": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz",
- "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==",
- "dev": true,
- "dependencies": {
- "esutils": "^2.0.2"
- },
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/eslint-plugin-react/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/eslint-plugin-react/node_modules/resolve": {
- "version": "2.0.0-next.5",
- "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz",
- "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==",
- "dev": true,
- "dependencies": {
- "is-core-module": "^2.13.0",
- "path-parse": "^1.0.7",
- "supports-preserve-symlinks-flag": "^1.0.0"
- },
- "bin": {
- "resolve": "bin/resolve"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/eslint-scope": {
- "version": "7.2.2",
- "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz",
- "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==",
- "dev": true,
- "dependencies": {
- "esrecurse": "^4.3.0",
- "estraverse": "^5.2.0"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- }
- },
- "node_modules/eslint-utils": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-3.0.0.tgz",
- "integrity": "sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==",
- "dev": true,
- "dependencies": {
- "eslint-visitor-keys": "^2.0.0"
- },
- "engines": {
- "node": "^10.0.0 || ^12.0.0 || >= 14.0.0"
- },
- "funding": {
- "url": "https://github.com/sponsors/mysticatea"
- },
- "peerDependencies": {
- "eslint": ">=5"
- }
- },
- "node_modules/eslint-utils/node_modules/eslint-visitor-keys": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
- "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
- "dev": true,
- "engines": {
- "node": ">=10"
- }
- },
- "node_modules/eslint-visitor-keys": {
- "version": "3.4.3",
- "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
- "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
- "dev": true,
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- }
- },
- "node_modules/eslint/node_modules/ansi-regex": {
- "version": "5.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
- "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
- "dev": true,
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/eslint/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/eslint/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/eslint/node_modules/strip-ansi": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "dev": true,
- "dependencies": {
- "ansi-regex": "^5.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/espree": {
- "version": "9.6.1",
- "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz",
- "integrity": "sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==",
- "dev": true,
- "dependencies": {
- "acorn": "^8.9.0",
- "acorn-jsx": "^5.3.2",
- "eslint-visitor-keys": "^3.4.1"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/eslint"
- }
- },
- "node_modules/esquery": {
- "version": "1.6.0",
- "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz",
- "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==",
- "dev": true,
- "dependencies": {
- "estraverse": "^5.1.0"
- },
- "engines": {
- "node": ">=0.10"
- }
- },
- "node_modules/esrecurse": {
- "version": "4.3.0",
- "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
- "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
- "dev": true,
- "dependencies": {
- "estraverse": "^5.2.0"
- },
- "engines": {
- "node": ">=4.0"
- }
- },
- "node_modules/estraverse": {
- "version": "5.3.0",
- "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
- "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
- "dev": true,
- "engines": {
- "node": ">=4.0"
- }
- },
- "node_modules/esutils": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
- "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
- "dev": true,
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/extend": {
- "version": "3.0.2",
- "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
- "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
- },
- "node_modules/extsprintf": {
- "version": "1.3.0",
- "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
- "integrity": "sha512-11Ndz7Nv+mvAC1j0ktTa7fAb0vLyGGX+rMHNBYQviQDGU0Hw7lhctJANqbPhu9nV9/izT/IntTgZ7Im/9LJs9g==",
- "engines": [
- "node >=0.6.0"
- ]
- },
- "node_modules/fast-deep-equal": {
- "version": "3.1.3",
- "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
- "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
- },
- "node_modules/fast-json-stable-stringify": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
- "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="
- },
- "node_modules/fast-levenshtein": {
- "version": "2.0.6",
- "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
- "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
- "dev": true
- },
- "node_modules/fastq": {
- "version": "1.17.1",
- "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz",
- "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==",
- "dev": true,
- "dependencies": {
- "reusify": "^1.0.4"
- }
- },
- "node_modules/file-entry-cache": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
- "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==",
- "dev": true,
- "dependencies": {
- "flat-cache": "^3.0.4"
- },
- "engines": {
- "node": "^10.12.0 || >=12.0.0"
- }
- },
- "node_modules/find-up": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
- "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
- "dev": true,
- "dependencies": {
- "locate-path": "^6.0.0",
- "path-exists": "^4.0.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/flat-cache": {
- "version": "3.2.0",
- "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.2.0.tgz",
- "integrity": "sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw==",
- "dev": true,
- "dependencies": {
- "flatted": "^3.2.9",
- "keyv": "^4.5.3",
- "rimraf": "^3.0.2"
- },
- "engines": {
- "node": "^10.12.0 || >=12.0.0"
- }
- },
- "node_modules/flat-cache/node_modules/brace-expansion": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
- "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
- "dev": true,
- "dependencies": {
- "balanced-match": "^1.0.0",
- "concat-map": "0.0.1"
- }
- },
- "node_modules/flat-cache/node_modules/glob": {
- "version": "7.2.3",
- "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
- "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
- "deprecated": "Glob versions prior to v9 are no longer supported",
- "dev": true,
- "dependencies": {
- "fs.realpath": "^1.0.0",
- "inflight": "^1.0.4",
- "inherits": "2",
- "minimatch": "^3.1.1",
- "once": "^1.3.0",
- "path-is-absolute": "^1.0.0"
- },
- "engines": {
- "node": "*"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/flat-cache/node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
- "dev": true,
- "dependencies": {
- "brace-expansion": "^1.1.7"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/flat-cache/node_modules/rimraf": {
- "version": "3.0.2",
- "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
- "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
- "deprecated": "Rimraf versions prior to v4 are no longer supported",
- "dev": true,
- "dependencies": {
- "glob": "^7.1.3"
- },
- "bin": {
- "rimraf": "bin.js"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/flatted": {
- "version": "3.3.1",
- "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
- "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==",
- "dev": true
- },
- "node_modules/for-each": {
- "version": "0.3.3",
- "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz",
- "integrity": "sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==",
- "dev": true,
- "dependencies": {
- "is-callable": "^1.1.3"
- }
- },
- "node_modules/foreground-child": {
- "version": "3.3.0",
- "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
- "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
- "dependencies": {
- "cross-spawn": "^7.0.0",
- "signal-exit": "^4.0.1"
- },
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/forever-agent": {
- "version": "0.6.1",
- "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
- "integrity": "sha512-j0KLYPhm6zeac4lz3oJ3o65qvgQCcPubiyotZrXqEaG4hNagNYO8qdlUrX5vwqv9ohqeT/Z3j6+yW067yWWdUw==",
- "engines": {
- "node": "*"
- }
- },
- "node_modules/form-data": {
- "version": "2.5.1",
- "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.5.1.tgz",
- "integrity": "sha512-m21N3WOmEEURgk6B9GLOE4RuWOFf28Lhh9qGYeNlGq4VDXUlJy2th2slBNU8Gp8EzloYZOibZJ7t5ecIrFSjVA==",
- "dependencies": {
- "asynckit": "^0.4.0",
- "combined-stream": "^1.0.6",
- "mime-types": "^2.1.12"
- },
- "engines": {
- "node": ">= 0.12"
- }
- },
- "node_modules/fs.realpath": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
- "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
- "dev": true
- },
- "node_modules/function-bind": {
- "version": "1.1.2",
- "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
- "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
- "dev": true,
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/function.prototype.name": {
- "version": "1.1.6",
- "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.6.tgz",
- "integrity": "sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "define-properties": "^1.2.0",
- "es-abstract": "^1.22.1",
- "functions-have-names": "^1.2.3"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/functions-have-names": {
- "version": "1.2.3",
- "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz",
- "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==",
- "dev": true,
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/get-intrinsic": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
- "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
- "dev": true,
- "dependencies": {
- "es-errors": "^1.3.0",
- "function-bind": "^1.1.2",
- "has-proto": "^1.0.1",
- "has-symbols": "^1.0.3",
- "hasown": "^2.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/get-stdin": {
- "version": "8.0.0",
- "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-8.0.0.tgz",
- "integrity": "sha512-sY22aA6xchAzprjyqmSEQv4UbAAzRN0L2dQB0NlN5acTTK9Don6nhoc3eAbUnpZiCANAMfd/+40kVdKfFygohg==",
- "dev": true,
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/get-symbol-description": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.0.2.tgz",
- "integrity": "sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.5",
- "es-errors": "^1.3.0",
- "get-intrinsic": "^1.2.4"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/getpass": {
- "version": "0.1.7",
- "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz",
- "integrity": "sha512-0fzj9JxOLfJ+XGLhR8ze3unN0KZCgZwiSSDz168VERjK8Wl8kVSdcu2kspd4s4wtAa1y/qrVRiAA0WclVsu0ng==",
- "dependencies": {
- "assert-plus": "^1.0.0"
- }
- },
- "node_modules/glob": {
- "version": "10.4.5",
- "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
- "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
- "dependencies": {
- "foreground-child": "^3.1.0",
- "jackspeak": "^3.1.2",
- "minimatch": "^9.0.4",
- "minipass": "^7.1.2",
- "package-json-from-dist": "^1.0.0",
- "path-scurry": "^1.11.1"
- },
- "bin": {
- "glob": "dist/esm/bin.mjs"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/glob-parent": {
- "version": "6.0.2",
- "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
- "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
- "dev": true,
- "dependencies": {
- "is-glob": "^4.0.3"
- },
- "engines": {
- "node": ">=10.13.0"
- }
- },
- "node_modules/globals": {
- "version": "13.24.0",
- "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz",
- "integrity": "sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==",
- "dev": true,
- "dependencies": {
- "type-fest": "^0.20.2"
- },
- "engines": {
- "node": ">=8"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/globalthis": {
- "version": "1.0.4",
- "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
- "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
- "dev": true,
- "dependencies": {
- "define-properties": "^1.2.1",
- "gopd": "^1.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/gopd": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
- "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
- "dev": true,
- "dependencies": {
- "get-intrinsic": "^1.1.3"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/graceful-fs": {
- "version": "4.2.11",
- "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
- "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
- "dev": true
- },
- "node_modules/graphemer": {
- "version": "1.4.0",
- "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
- "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
- "dev": true
- },
- "node_modules/har-schema": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
- "integrity": "sha512-Oqluz6zhGX8cyRaTQlFMPw80bSJVG2x/cFb8ZPhUILGgHka9SsokCCOQgpveePerqidZOrT14ipqfJb7ILcW5Q==",
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/har-validator": {
- "version": "5.1.5",
- "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz",
- "integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==",
- "deprecated": "this library is no longer supported",
- "dependencies": {
- "ajv": "^6.12.3",
- "har-schema": "^2.0.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/has-bigints": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
- "integrity": "sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ==",
- "dev": true,
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/has-flag": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
- "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
- "dev": true,
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/has-property-descriptors": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
- "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
- "dev": true,
- "dependencies": {
- "es-define-property": "^1.0.0"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/has-proto": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
- "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/has-symbols": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
- "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/has-tostringtag": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
- "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
- "dev": true,
- "dependencies": {
- "has-symbols": "^1.0.3"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/hasown": {
- "version": "2.0.2",
- "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
- "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
- "dev": true,
- "dependencies": {
- "function-bind": "^1.1.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/http-signature": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
- "integrity": "sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==",
- "dependencies": {
- "assert-plus": "^1.0.0",
- "jsprim": "^1.2.2",
- "sshpk": "^1.7.0"
- },
- "engines": {
- "node": ">=0.8",
- "npm": ">=1.3.7"
- }
- },
- "node_modules/ignore": {
- "version": "5.3.2",
- "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
- "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==",
- "dev": true,
- "engines": {
- "node": ">= 4"
- }
- },
- "node_modules/import-fresh": {
- "version": "3.3.0",
- "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz",
- "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==",
- "dev": true,
- "dependencies": {
- "parent-module": "^1.0.0",
- "resolve-from": "^4.0.0"
- },
- "engines": {
- "node": ">=6"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/imurmurhash": {
- "version": "0.1.4",
- "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
- "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==",
- "dev": true,
- "engines": {
- "node": ">=0.8.19"
- }
- },
- "node_modules/inflight": {
- "version": "1.0.6",
- "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
- "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
- "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
- "dev": true,
- "dependencies": {
- "once": "^1.3.0",
- "wrappy": "1"
- }
- },
- "node_modules/inherits": {
- "version": "2.0.4",
- "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
- "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
- "dev": true
- },
- "node_modules/internal-slot": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.7.tgz",
- "integrity": "sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g==",
- "dev": true,
- "dependencies": {
- "es-errors": "^1.3.0",
- "hasown": "^2.0.0",
- "side-channel": "^1.0.4"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/is-array-buffer": {
- "version": "3.0.4",
- "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.4.tgz",
- "integrity": "sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "get-intrinsic": "^1.2.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-arrayish": {
- "version": "0.2.1",
- "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
- "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
- "dev": true
- },
- "node_modules/is-async-function": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.0.0.tgz",
- "integrity": "sha512-Y1JXKrfykRJGdlDwdKlLpLyMIiWqWvuSd17TvZk68PLAOGOoF4Xyav1z0Xhoi+gCYjZVeC5SI+hYFOfvXmGRCA==",
- "dev": true,
- "dependencies": {
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-bigint": {
- "version": "1.0.4",
- "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.4.tgz",
- "integrity": "sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg==",
- "dev": true,
- "dependencies": {
- "has-bigints": "^1.0.1"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-boolean-object": {
- "version": "1.1.2",
- "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.2.tgz",
- "integrity": "sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-callable": {
- "version": "1.2.7",
- "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
- "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-core-module": {
- "version": "2.15.1",
- "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.15.1.tgz",
- "integrity": "sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==",
- "dev": true,
- "dependencies": {
- "hasown": "^2.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-data-view": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.1.tgz",
- "integrity": "sha512-AHkaJrsUVW6wq6JS8y3JnM/GJF/9cf+k20+iDzlSaJrinEo5+7vRiteOSwBhHRiAyQATN1AmY4hwzxJKPmYf+w==",
- "dev": true,
- "dependencies": {
- "is-typed-array": "^1.1.13"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-date-object": {
- "version": "1.0.5",
- "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.5.tgz",
- "integrity": "sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ==",
- "dev": true,
- "dependencies": {
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-extglob": {
- "version": "2.1.1",
- "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
- "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
- "dev": true,
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/is-finalizationregistry": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.0.2.tgz",
- "integrity": "sha512-0by5vtUJs8iFQb5TYUHHPudOR+qXYIMKtiUzvLIZITZUjknFmziyBJuLhVRc+Ds0dREFlskDNJKYIdIzu/9pfw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-fullwidth-code-point": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
- "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/is-generator-function": {
- "version": "1.0.10",
- "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz",
- "integrity": "sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==",
- "dev": true,
- "dependencies": {
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-glob": {
- "version": "4.0.3",
- "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
- "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
- "dev": true,
- "dependencies": {
- "is-extglob": "^2.1.1"
- },
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/is-map": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz",
- "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-negative-zero": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.3.tgz",
- "integrity": "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-number-object": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.7.tgz",
- "integrity": "sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ==",
- "dev": true,
- "dependencies": {
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-path-inside": {
- "version": "3.0.3",
- "resolved": "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz",
- "integrity": "sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==",
- "dev": true,
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/is-regex": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
- "integrity": "sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-set": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz",
- "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-shared-array-buffer": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.3.tgz",
- "integrity": "sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-string": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.7.tgz",
- "integrity": "sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg==",
- "dev": true,
- "dependencies": {
- "has-tostringtag": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-symbol": {
- "version": "1.0.4",
- "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz",
- "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==",
- "dev": true,
- "dependencies": {
- "has-symbols": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-typed-array": {
- "version": "1.1.13",
- "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.13.tgz",
- "integrity": "sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==",
- "dev": true,
- "dependencies": {
- "which-typed-array": "^1.1.14"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-typedarray": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
- "integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA=="
- },
- "node_modules/is-weakmap": {
- "version": "2.0.2",
- "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz",
- "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-weakref": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.0.2.tgz",
- "integrity": "sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/is-weakset": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.3.tgz",
- "integrity": "sha512-LvIm3/KWzS9oRFHugab7d+M/GcBXuXX5xZkzPmN+NxihdQlZUQ4dWuSV1xR/sq6upL1TJEDrfBgRepHFdBtSNQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "get-intrinsic": "^1.2.4"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/isarray": {
- "version": "2.0.5",
- "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz",
- "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==",
- "dev": true
- },
- "node_modules/isexe": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
- "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
- },
- "node_modules/isomorphic-ws": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
- "integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
- "peerDependencies": {
- "ws": "*"
- }
- },
- "node_modules/isstream": {
- "version": "0.1.2",
- "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
- "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
- },
- "node_modules/iterator.prototype": {
- "version": "1.1.2",
- "resolved": "https://registry.npmjs.org/iterator.prototype/-/iterator.prototype-1.1.2.tgz",
- "integrity": "sha512-DR33HMMr8EzwuRL8Y9D3u2BMj8+RqSE850jfGu59kS7tbmPLzGkZmVSfyCFSDxuZiEY6Rzt3T2NA/qU+NwVj1w==",
- "dev": true,
- "dependencies": {
- "define-properties": "^1.2.1",
- "get-intrinsic": "^1.2.1",
- "has-symbols": "^1.0.3",
- "reflect.getprototypeof": "^1.0.4",
- "set-function-name": "^2.0.1"
- }
- },
- "node_modules/jackspeak": {
- "version": "3.4.3",
- "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
- "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
- "dependencies": {
- "@isaacs/cliui": "^8.0.2"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- },
- "optionalDependencies": {
- "@pkgjs/parseargs": "^0.11.0"
- }
- },
- "node_modules/jose": {
- "version": "4.15.9",
- "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.9.tgz",
- "integrity": "sha512-1vUQX+IdDMVPj4k8kOxgUqlcK518yluMuGZwqlr44FS1ppZB/5GWh4rZG89erpOBOJjU/OBsnCVFfapsRz6nEA==",
- "optional": true,
- "funding": {
- "url": "https://github.com/sponsors/panva"
- }
- },
- "node_modules/js-tokens": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
- "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
- "dev": true
- },
- "node_modules/js-yaml": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
- "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
- "dependencies": {
- "argparse": "^2.0.1"
- },
- "bin": {
- "js-yaml": "bin/js-yaml.js"
- }
- },
- "node_modules/jsbn": {
- "version": "0.1.1",
- "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
- "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
- },
- "node_modules/json-buffer": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
- "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==",
- "dev": true
- },
- "node_modules/json-parse-better-errors": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz",
- "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==",
- "dev": true
- },
- "node_modules/json-schema": {
- "version": "0.4.0",
- "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
- "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="
- },
- "node_modules/json-schema-traverse": {
- "version": "0.4.1",
- "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
- "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="
- },
- "node_modules/json-stable-stringify-without-jsonify": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
- "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
- "dev": true
- },
- "node_modules/json-stringify-safe": {
- "version": "5.0.1",
- "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
- "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
- },
- "node_modules/json5": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz",
- "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==",
- "dev": true,
- "dependencies": {
- "minimist": "^1.2.0"
- },
- "bin": {
- "json5": "lib/cli.js"
- }
- },
- "node_modules/jsonpath-plus": {
- "version": "8.1.0",
- "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-8.1.0.tgz",
- "integrity": "sha512-qVTiuKztFGw0dGhYi3WNqvddx3/SHtyDT0xJaeyz4uP0d1tkpG+0y5uYQ4OcIo1TLAz3PE/qDOW9F0uDt3+CTw==",
- "bin": {
- "jsonpath": "bin/jsonpath-cli.js",
- "jsonpath-plus": "bin/jsonpath-cli.js"
- },
- "engines": {
- "node": ">=14.0.0"
- }
- },
- "node_modules/jsprim": {
- "version": "1.4.2",
- "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.2.tgz",
- "integrity": "sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==",
- "dependencies": {
- "assert-plus": "1.0.0",
- "extsprintf": "1.3.0",
- "json-schema": "0.4.0",
- "verror": "1.10.0"
- },
- "engines": {
- "node": ">=0.6.0"
- }
- },
- "node_modules/jsx-ast-utils": {
- "version": "3.3.5",
- "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
- "integrity": "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==",
- "dev": true,
- "dependencies": {
- "array-includes": "^3.1.6",
- "array.prototype.flat": "^1.3.1",
- "object.assign": "^4.1.4",
- "object.values": "^1.1.6"
- },
- "engines": {
- "node": ">=4.0"
- }
- },
- "node_modules/keyv": {
- "version": "4.5.4",
- "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
- "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==",
- "dev": true,
- "dependencies": {
- "json-buffer": "3.0.1"
- }
- },
- "node_modules/kubernetes-resource-parser": {
- "version": "0.1.0",
- "resolved": "https://registry.npmjs.org/kubernetes-resource-parser/-/kubernetes-resource-parser-0.1.0.tgz",
- "integrity": "sha512-rr2K/4akDkY3oKgJ/KL3KAKw8Fb0VwBucGgKhvgqXluVhfn2BgEuJUXIDU+zt4eWaqOOjAC6ApUgnHF/SJ/iNw==",
- "license": "MIT"
- },
- "node_modules/levn": {
- "version": "0.4.1",
- "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
- "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
- "dev": true,
- "dependencies": {
- "prelude-ls": "^1.2.1",
- "type-check": "~0.4.0"
- },
- "engines": {
- "node": ">= 0.8.0"
- }
- },
- "node_modules/load-json-file": {
- "version": "5.3.0",
- "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-5.3.0.tgz",
- "integrity": "sha512-cJGP40Jc/VXUsp8/OrnyKyTZ1y6v/dphm3bioS+RrKXjK2BB6wHUd6JptZEFDGgGahMT+InnZO5i1Ei9mpC8Bw==",
- "dev": true,
- "dependencies": {
- "graceful-fs": "^4.1.15",
- "parse-json": "^4.0.0",
- "pify": "^4.0.1",
- "strip-bom": "^3.0.0",
- "type-fest": "^0.3.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/load-json-file/node_modules/type-fest": {
- "version": "0.3.1",
- "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.3.1.tgz",
- "integrity": "sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==",
- "dev": true,
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/locate-path": {
- "version": "6.0.0",
- "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
- "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
- "dev": true,
- "dependencies": {
- "p-locate": "^5.0.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/lodash.merge": {
- "version": "4.6.2",
- "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
- "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
- "dev": true
- },
- "node_modules/loose-envify": {
- "version": "1.4.0",
- "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
- "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
- "dev": true,
- "dependencies": {
- "js-tokens": "^3.0.0 || ^4.0.0"
- },
- "bin": {
- "loose-envify": "cli.js"
- }
- },
- "node_modules/lru-cache": {
- "version": "6.0.0",
- "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
- "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
- "optional": true,
- "dependencies": {
- "yallist": "^4.0.0"
- },
- "engines": {
- "node": ">=10"
- }
- },
- "node_modules/mime-db": {
- "version": "1.52.0",
- "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
- "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
- "engines": {
- "node": ">= 0.6"
- }
- },
- "node_modules/mime-types": {
- "version": "2.1.35",
- "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
- "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
- "dependencies": {
- "mime-db": "1.52.0"
- },
- "engines": {
- "node": ">= 0.6"
- }
- },
- "node_modules/minimatch": {
- "version": "9.0.5",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
- "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
- "dependencies": {
- "brace-expansion": "^2.0.1"
- },
- "engines": {
- "node": ">=16 || 14 >=14.17"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/minimist": {
- "version": "1.2.8",
- "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
- "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
- "dev": true,
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/minipass": {
- "version": "7.1.2",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
- "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
- "engines": {
- "node": ">=16 || 14 >=14.17"
- }
- },
- "node_modules/minizlib": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
- "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
- "dependencies": {
- "minipass": "^7.0.4",
- "rimraf": "^5.0.5"
- },
- "engines": {
- "node": ">= 18"
- }
- },
- "node_modules/mkdirp": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
- "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
- "bin": {
- "mkdirp": "dist/cjs/src/bin.js"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/ms": {
- "version": "2.1.3",
- "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
- "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
- "dev": true
- },
- "node_modules/natural-compare": {
- "version": "1.4.0",
- "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
- "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
- "dev": true
- },
- "node_modules/oauth-sign": {
- "version": "0.9.0",
- "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
- "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==",
- "engines": {
- "node": "*"
- }
- },
- "node_modules/object-assign": {
- "version": "4.1.1",
- "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
- "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
- "dev": true,
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/object-hash": {
- "version": "2.2.0",
- "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-2.2.0.tgz",
- "integrity": "sha512-gScRMn0bS5fH+IuwyIFgnh9zBdo4DV+6GhygmWM9HyNJSgS0hScp1f5vjtm7oIIOiT9trXrShAkLFSc2IqKNgw==",
- "optional": true,
- "engines": {
- "node": ">= 6"
- }
- },
- "node_modules/object-inspect": {
- "version": "1.13.2",
- "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz",
- "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/object-keys": {
- "version": "1.1.1",
- "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
- "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/object.assign": {
- "version": "4.1.5",
- "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.5.tgz",
- "integrity": "sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.5",
- "define-properties": "^1.2.1",
- "has-symbols": "^1.0.3",
- "object-keys": "^1.1.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/object.entries": {
- "version": "1.1.8",
- "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.8.tgz",
- "integrity": "sha512-cmopxi8VwRIAw/fkijJohSfpef5PdN0pMQJN6VC/ZKvn0LIknWD8KtgY6KlQdEc4tIjcQ3HxSMmnvtzIscdaYQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-object-atoms": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/object.fromentries": {
- "version": "2.0.8",
- "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz",
- "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2",
- "es-object-atoms": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/object.groupby": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz",
- "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/object.values": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.0.tgz",
- "integrity": "sha512-yBYjY9QX2hnRmZHAjG/f13MzmBzxzYgQhFrke06TTyKY5zSTEqkOeukBzIdVA3j3ulu8Qa3MbVFShV7T2RmGtQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-object-atoms": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/oidc-token-hash": {
- "version": "5.0.3",
- "resolved": "https://registry.npmjs.org/oidc-token-hash/-/oidc-token-hash-5.0.3.tgz",
- "integrity": "sha512-IF4PcGgzAr6XXSff26Sk/+P4KZFJVuHAJZj3wgO3vX2bMdNVp/QXTP3P7CEm9V1IdG8lDLY3HhiqpsE/nOwpPw==",
- "optional": true,
- "engines": {
- "node": "^10.13.0 || >=12.0.0"
- }
- },
- "node_modules/once": {
- "version": "1.4.0",
- "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
- "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
- "dev": true,
- "dependencies": {
- "wrappy": "1"
- }
- },
- "node_modules/openid-client": {
- "version": "5.6.5",
- "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-5.6.5.tgz",
- "integrity": "sha512-5P4qO9nGJzB5PI0LFlhj4Dzg3m4odt0qsJTfyEtZyOlkgpILwEioOhVVJOrS1iVH494S4Ee5OCjjg6Bf5WOj3w==",
- "optional": true,
- "dependencies": {
- "jose": "^4.15.5",
- "lru-cache": "^6.0.0",
- "object-hash": "^2.2.0",
- "oidc-token-hash": "^5.0.3"
- },
- "funding": {
- "url": "https://github.com/sponsors/panva"
- }
- },
- "node_modules/optionator": {
- "version": "0.9.4",
- "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
- "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==",
- "dev": true,
- "dependencies": {
- "deep-is": "^0.1.3",
- "fast-levenshtein": "^2.0.6",
- "levn": "^0.4.1",
- "prelude-ls": "^1.2.1",
- "type-check": "^0.4.0",
- "word-wrap": "^1.2.5"
- },
- "engines": {
- "node": ">= 0.8.0"
- }
- },
- "node_modules/p-limit": {
- "version": "3.1.0",
- "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
- "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
- "dev": true,
- "dependencies": {
- "yocto-queue": "^0.1.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/p-locate": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz",
- "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==",
- "dev": true,
- "dependencies": {
- "p-limit": "^3.0.2"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/p-try": {
- "version": "2.2.0",
- "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
- "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==",
- "dev": true,
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/package-json-from-dist": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.0.tgz",
- "integrity": "sha512-dATvCeZN/8wQsGywez1mzHtTlP22H8OEfPrVMLNr4/eGa+ijtLn/6M5f0dY8UKNrC2O9UCU6SSoG3qRKnt7STw=="
- },
- "node_modules/parent-module": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
- "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==",
- "dev": true,
- "dependencies": {
- "callsites": "^3.0.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/parse-json": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz",
- "integrity": "sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw==",
- "dev": true,
- "dependencies": {
- "error-ex": "^1.3.1",
- "json-parse-better-errors": "^1.0.1"
- },
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/path-exists": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
- "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
- "dev": true,
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/path-is-absolute": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
- "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
- "dev": true,
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/path-key": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
- "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/path-parse": {
- "version": "1.0.7",
- "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
- "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
- "dev": true
- },
- "node_modules/path-scurry": {
- "version": "1.11.1",
- "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
- "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
- "dependencies": {
- "lru-cache": "^10.2.0",
- "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
- },
- "engines": {
- "node": ">=16 || 14 >=14.18"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/path-scurry/node_modules/lru-cache": {
- "version": "10.4.3",
- "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
- "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
- },
- "node_modules/performance-now": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
- "integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow=="
- },
- "node_modules/pify": {
- "version": "4.0.1",
- "resolved": "https://registry.npmjs.org/pify/-/pify-4.0.1.tgz",
- "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==",
- "dev": true,
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/pkg-conf": {
- "version": "3.1.0",
- "resolved": "https://registry.npmjs.org/pkg-conf/-/pkg-conf-3.1.0.tgz",
- "integrity": "sha512-m0OTbR/5VPNPqO1ph6Fqbj7Hv6QU7gR/tQW40ZqrL1rjgCU85W6C1bJn0BItuJqnR98PWzw7Z8hHeChD1WrgdQ==",
- "dev": true,
- "dependencies": {
- "find-up": "^3.0.0",
- "load-json-file": "^5.2.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/pkg-conf/node_modules/find-up": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz",
- "integrity": "sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==",
- "dev": true,
- "dependencies": {
- "locate-path": "^3.0.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/pkg-conf/node_modules/locate-path": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz",
- "integrity": "sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==",
- "dev": true,
- "dependencies": {
- "p-locate": "^3.0.0",
- "path-exists": "^3.0.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/pkg-conf/node_modules/p-limit": {
- "version": "2.3.0",
- "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
- "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
- "dev": true,
- "dependencies": {
- "p-try": "^2.0.0"
- },
- "engines": {
- "node": ">=6"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/pkg-conf/node_modules/p-locate": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-3.0.0.tgz",
- "integrity": "sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==",
- "dev": true,
- "dependencies": {
- "p-limit": "^2.0.0"
- },
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/pkg-conf/node_modules/path-exists": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
- "integrity": "sha512-bpC7GYwiDYQ4wYLe+FA8lhRjhQCMcQGuSgGGqDkg/QerRWw9CmGRT0iSOVRSZJ29NMLZgIzqaljJ63oaL4NIJQ==",
- "dev": true,
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/possible-typed-array-names": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz",
- "integrity": "sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/prelude-ls": {
- "version": "1.2.1",
- "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
- "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
- "dev": true,
- "engines": {
- "node": ">= 0.8.0"
- }
- },
- "node_modules/prop-types": {
- "version": "15.8.1",
- "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
- "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
- "dev": true,
- "dependencies": {
- "loose-envify": "^1.4.0",
- "object-assign": "^4.1.1",
- "react-is": "^16.13.1"
- }
- },
- "node_modules/psl": {
- "version": "1.9.0",
- "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
- "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag=="
- },
- "node_modules/punycode": {
- "version": "2.3.1",
- "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
- "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
- "engines": {
- "node": ">=6"
- }
- },
- "node_modules/qs": {
- "version": "6.5.3",
- "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz",
- "integrity": "sha512-qxXIEh4pCGfHICj1mAJQ2/2XVZkjCDTcEgfoSQxc/fYivUZxTkk7L3bDBJSoNrEzXI17oUO5Dp07ktqE5KzczA==",
- "engines": {
- "node": ">=0.6"
- }
- },
- "node_modules/queue-microtask": {
- "version": "1.2.3",
- "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
- "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ]
- },
- "node_modules/react-is": {
- "version": "16.13.1",
- "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
- "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
- "dev": true
- },
- "node_modules/reflect.getprototypeof": {
- "version": "1.0.6",
- "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz",
- "integrity": "sha512-fmfw4XgoDke3kdI6h4xcUz1dG8uaiv5q9gcEwLS4Pnth2kxT+GZ7YehS1JTMGBQmtV7Y4GFGbs2re2NqhdozUg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.1",
- "es-errors": "^1.3.0",
- "get-intrinsic": "^1.2.4",
- "globalthis": "^1.0.3",
- "which-builtin-type": "^1.1.3"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/regexp.prototype.flags": {
- "version": "1.5.2",
- "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz",
- "integrity": "sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.6",
- "define-properties": "^1.2.1",
- "es-errors": "^1.3.0",
- "set-function-name": "^2.0.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/regexpp": {
- "version": "3.2.0",
- "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.2.0.tgz",
- "integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==",
- "dev": true,
- "engines": {
- "node": ">=8"
- },
- "funding": {
- "url": "https://github.com/sponsors/mysticatea"
- }
- },
- "node_modules/request": {
- "version": "2.88.2",
- "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
- "integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==",
- "deprecated": "request has been deprecated, see https://github.com/request/request/issues/3142",
- "dependencies": {
- "aws-sign2": "~0.7.0",
- "aws4": "^1.8.0",
- "caseless": "~0.12.0",
- "combined-stream": "~1.0.6",
- "extend": "~3.0.2",
- "forever-agent": "~0.6.1",
- "form-data": "~2.3.2",
- "har-validator": "~5.1.3",
- "http-signature": "~1.2.0",
- "is-typedarray": "~1.0.0",
- "isstream": "~0.1.2",
- "json-stringify-safe": "~5.0.1",
- "mime-types": "~2.1.19",
- "oauth-sign": "~0.9.0",
- "performance-now": "^2.1.0",
- "qs": "~6.5.2",
- "safe-buffer": "^5.1.2",
- "tough-cookie": "~2.5.0",
- "tunnel-agent": "^0.6.0",
- "uuid": "^3.3.2"
- },
- "engines": {
- "node": ">= 6"
- }
- },
- "node_modules/request/node_modules/form-data": {
- "version": "2.3.3",
- "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz",
- "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==",
- "dependencies": {
- "asynckit": "^0.4.0",
- "combined-stream": "^1.0.6",
- "mime-types": "^2.1.12"
- },
- "engines": {
- "node": ">= 0.12"
- }
- },
- "node_modules/resolve": {
- "version": "1.22.8",
- "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz",
- "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==",
- "dev": true,
- "dependencies": {
- "is-core-module": "^2.13.0",
- "path-parse": "^1.0.7",
- "supports-preserve-symlinks-flag": "^1.0.0"
- },
- "bin": {
- "resolve": "bin/resolve"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/resolve-from": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
- "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==",
- "dev": true,
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/reusify": {
- "version": "1.0.4",
- "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz",
- "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==",
- "dev": true,
- "engines": {
- "iojs": ">=1.0.0",
- "node": ">=0.10.0"
- }
- },
- "node_modules/rfc4648": {
- "version": "1.5.3",
- "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.3.tgz",
- "integrity": "sha512-MjOWxM065+WswwnmNONOT+bD1nXzY9Km6u3kzvnx8F8/HXGZdz3T6e6vZJ8Q/RIMUSp/nxqjH3GwvJDy8ijeQQ=="
- },
- "node_modules/rimraf": {
- "version": "5.0.10",
- "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
- "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
- "dependencies": {
- "glob": "^10.3.7"
- },
- "bin": {
- "rimraf": "dist/esm/bin.mjs"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/run-parallel": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
- "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ],
- "dependencies": {
- "queue-microtask": "^1.2.2"
- }
- },
- "node_modules/safe-array-concat": {
- "version": "1.1.2",
- "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.2.tgz",
- "integrity": "sha512-vj6RsCsWBCf19jIeHEfkRMw8DPiBb+DMXklQ/1SGDHOMlHdPUkZXFQ2YdplS23zESTijAcurb1aSgJA3AgMu1Q==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "get-intrinsic": "^1.2.4",
- "has-symbols": "^1.0.3",
- "isarray": "^2.0.5"
- },
- "engines": {
- "node": ">=0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/safe-buffer": {
- "version": "5.2.1",
- "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
- "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ]
- },
- "node_modules/safe-regex-test": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.3.tgz",
- "integrity": "sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.6",
- "es-errors": "^1.3.0",
- "is-regex": "^1.1.4"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/safer-buffer": {
- "version": "2.1.2",
- "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
- "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
- },
- "node_modules/semver": {
- "version": "6.3.1",
- "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
- "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
- "dev": true,
- "bin": {
- "semver": "bin/semver.js"
- }
- },
- "node_modules/set-function-length": {
- "version": "1.2.2",
- "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
- "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
- "dev": true,
- "dependencies": {
- "define-data-property": "^1.1.4",
- "es-errors": "^1.3.0",
- "function-bind": "^1.1.2",
- "get-intrinsic": "^1.2.4",
- "gopd": "^1.0.1",
- "has-property-descriptors": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/set-function-name": {
- "version": "2.0.2",
- "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz",
- "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==",
- "dev": true,
- "dependencies": {
- "define-data-property": "^1.1.4",
- "es-errors": "^1.3.0",
- "functions-have-names": "^1.2.3",
- "has-property-descriptors": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/shebang-command": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
- "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
- "dependencies": {
- "shebang-regex": "^3.0.0"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/shebang-regex": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
- "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/side-channel": {
- "version": "1.0.6",
- "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
- "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "es-errors": "^1.3.0",
- "get-intrinsic": "^1.2.4",
- "object-inspect": "^1.13.1"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/signal-exit": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
- "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/sshpk": {
- "version": "1.18.0",
- "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz",
- "integrity": "sha512-2p2KJZTSqQ/I3+HX42EpYOa2l3f8Erv8MWKsy2I9uf4wA7yFIkXRffYdsx86y6z4vHtV8u7g+pPlr8/4ouAxsQ==",
- "dependencies": {
- "asn1": "~0.2.3",
- "assert-plus": "^1.0.0",
- "bcrypt-pbkdf": "^1.0.0",
- "dashdash": "^1.12.0",
- "ecc-jsbn": "~0.1.1",
- "getpass": "^0.1.1",
- "jsbn": "~0.1.0",
- "safer-buffer": "^2.0.2",
- "tweetnacl": "~0.14.0"
- },
- "bin": {
- "sshpk-conv": "bin/sshpk-conv",
- "sshpk-sign": "bin/sshpk-sign",
- "sshpk-verify": "bin/sshpk-verify"
- },
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/standard": {
- "version": "17.1.2",
- "resolved": "https://registry.npmjs.org/standard/-/standard-17.1.2.tgz",
- "integrity": "sha512-WLm12WoXveKkvnPnPnaFUUHuOB2cUdAsJ4AiGHL2G0UNMrcRAWY2WriQaV8IQ3oRmYr0AWUbLNr94ekYFAHOrA==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ],
- "dependencies": {
- "eslint": "^8.41.0",
- "eslint-config-standard": "17.1.0",
- "eslint-config-standard-jsx": "^11.0.0",
- "eslint-plugin-import": "^2.27.5",
- "eslint-plugin-n": "^15.7.0",
- "eslint-plugin-promise": "^6.1.1",
- "eslint-plugin-react": "^7.36.1",
- "standard-engine": "^15.1.0",
- "version-guard": "^1.1.1"
- },
- "bin": {
- "standard": "bin/cmd.cjs"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- }
- },
- "node_modules/standard-engine": {
- "version": "15.1.0",
- "resolved": "https://registry.npmjs.org/standard-engine/-/standard-engine-15.1.0.tgz",
- "integrity": "sha512-VHysfoyxFu/ukT+9v49d4BRXIokFRZuH3z1VRxzFArZdjSCFpro6rEIU3ji7e4AoAtuSfKBkiOmsrDqKW5ZSRw==",
- "dev": true,
- "funding": [
- {
- "type": "github",
- "url": "https://github.com/sponsors/feross"
- },
- {
- "type": "patreon",
- "url": "https://www.patreon.com/feross"
- },
- {
- "type": "consulting",
- "url": "https://feross.org/support"
- }
- ],
- "dependencies": {
- "get-stdin": "^8.0.0",
- "minimist": "^1.2.6",
- "pkg-conf": "^3.1.0",
- "xdg-basedir": "^4.0.0"
- },
- "engines": {
- "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
- }
- },
- "node_modules/stream-buffers": {
- "version": "3.0.3",
- "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
- "integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
- "engines": {
- "node": ">= 0.10.0"
- }
- },
- "node_modules/string-width": {
- "version": "5.1.2",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
- "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
- "dependencies": {
- "eastasianwidth": "^0.2.0",
- "emoji-regex": "^9.2.2",
- "strip-ansi": "^7.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/string-width-cjs": {
- "name": "string-width",
- "version": "4.2.3",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
- "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
- "dependencies": {
- "emoji-regex": "^8.0.0",
- "is-fullwidth-code-point": "^3.0.0",
- "strip-ansi": "^6.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/string-width-cjs/node_modules/ansi-regex": {
- "version": "5.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
- "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/string-width-cjs/node_modules/emoji-regex": {
- "version": "8.0.0",
- "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
- "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
- },
- "node_modules/string-width-cjs/node_modules/strip-ansi": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "dependencies": {
- "ansi-regex": "^5.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/string.prototype.matchall": {
- "version": "4.0.11",
- "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.11.tgz",
- "integrity": "sha512-NUdh0aDavY2og7IbBPenWqR9exH+E26Sv8e0/eTe1tltDGZL+GtBkDAnnyBtmekfK6/Dq3MkcGtzXFEd1LQrtg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.2",
- "es-errors": "^1.3.0",
- "es-object-atoms": "^1.0.0",
- "get-intrinsic": "^1.2.4",
- "gopd": "^1.0.1",
- "has-symbols": "^1.0.3",
- "internal-slot": "^1.0.7",
- "regexp.prototype.flags": "^1.5.2",
- "set-function-name": "^2.0.2",
- "side-channel": "^1.0.6"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/string.prototype.repeat": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz",
- "integrity": "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==",
- "dev": true,
- "dependencies": {
- "define-properties": "^1.1.3",
- "es-abstract": "^1.17.5"
- }
- },
- "node_modules/string.prototype.trim": {
- "version": "1.2.9",
- "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.9.tgz",
- "integrity": "sha512-klHuCNxiMZ8MlsOihJhJEBJAiMVqU3Z2nEXWfWnIqjN0gEFS9J9+IxKozWWtQGcgoa1WUZzLjKPTr4ZHNFTFxw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-abstract": "^1.23.0",
- "es-object-atoms": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/string.prototype.trimend": {
- "version": "1.0.8",
- "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.8.tgz",
- "integrity": "sha512-p73uL5VCHCO2BZZ6krwwQE3kCzM7NKmis8S//xEC6fQonchbum4eP6kR4DLEjQFO3Wnj3Fuo8NM0kOSjVdHjZQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-object-atoms": "^1.0.0"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/string.prototype.trimstart": {
- "version": "1.0.8",
- "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz",
- "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "define-properties": "^1.2.1",
- "es-object-atoms": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/strip-ansi": {
- "version": "7.1.0",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
- "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
- "dependencies": {
- "ansi-regex": "^6.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/strip-ansi?sponsor=1"
- }
- },
- "node_modules/strip-ansi-cjs": {
- "name": "strip-ansi",
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "dependencies": {
- "ansi-regex": "^5.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/strip-ansi-cjs/node_modules/ansi-regex": {
- "version": "5.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
- "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/strip-bom": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
- "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==",
- "dev": true,
- "engines": {
- "node": ">=4"
- }
- },
- "node_modules/strip-json-comments": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
- "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
- "dev": true,
- "engines": {
- "node": ">=8"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/supports-color": {
- "version": "7.2.0",
- "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
- "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
- "dev": true,
- "dependencies": {
- "has-flag": "^4.0.0"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/supports-preserve-symlinks-flag": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
- "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
- "dev": true,
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/tar": {
- "version": "7.4.3",
- "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
- "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
- "dependencies": {
- "@isaacs/fs-minipass": "^4.0.0",
- "chownr": "^3.0.0",
- "minipass": "^7.1.2",
- "minizlib": "^3.0.1",
- "mkdirp": "^3.0.1",
- "yallist": "^5.0.0"
- },
- "engines": {
- "node": ">=18"
- }
- },
- "node_modules/tar/node_modules/yallist": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
- "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
- "engines": {
- "node": ">=18"
- }
- },
- "node_modules/text-table": {
- "version": "0.2.0",
- "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
- "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
- "dev": true
- },
- "node_modules/tough-cookie": {
- "version": "2.5.0",
- "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz",
- "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==",
- "dependencies": {
- "psl": "^1.1.28",
- "punycode": "^2.1.1"
- },
- "engines": {
- "node": ">=0.8"
- }
- },
- "node_modules/tsconfig-paths": {
- "version": "3.15.0",
- "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz",
- "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==",
- "dev": true,
- "dependencies": {
- "@types/json5": "^0.0.29",
- "json5": "^1.0.2",
- "minimist": "^1.2.6",
- "strip-bom": "^3.0.0"
- }
- },
- "node_modules/tslib": {
- "version": "2.7.0",
- "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.7.0.tgz",
- "integrity": "sha512-gLXCKdN1/j47AiHiOkJN69hJmcbGTHI0ImLmbYLHykhgeN0jVGola9yVjFgzCUklsZQMW55o+dW7IXv3RCXDzA=="
- },
- "node_modules/tunnel-agent": {
- "version": "0.6.0",
- "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
- "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
- "dependencies": {
- "safe-buffer": "^5.0.1"
- },
- "engines": {
- "node": "*"
- }
- },
- "node_modules/tweetnacl": {
- "version": "0.14.5",
- "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
- "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA=="
- },
- "node_modules/type-check": {
- "version": "0.4.0",
- "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
- "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==",
- "dev": true,
- "dependencies": {
- "prelude-ls": "^1.2.1"
- },
- "engines": {
- "node": ">= 0.8.0"
- }
- },
- "node_modules/type-fest": {
- "version": "0.20.2",
- "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz",
- "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
- "dev": true,
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/typed-array-buffer": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz",
- "integrity": "sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "es-errors": "^1.3.0",
- "is-typed-array": "^1.1.13"
- },
- "engines": {
- "node": ">= 0.4"
- }
- },
- "node_modules/typed-array-byte-length": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.1.tgz",
- "integrity": "sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "for-each": "^0.3.3",
- "gopd": "^1.0.1",
- "has-proto": "^1.0.3",
- "is-typed-array": "^1.1.13"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/typed-array-byte-offset": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.2.tgz",
- "integrity": "sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA==",
- "dev": true,
- "dependencies": {
- "available-typed-arrays": "^1.0.7",
- "call-bind": "^1.0.7",
- "for-each": "^0.3.3",
- "gopd": "^1.0.1",
- "has-proto": "^1.0.3",
- "is-typed-array": "^1.1.13"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/typed-array-length": {
- "version": "1.0.6",
- "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.6.tgz",
- "integrity": "sha512-/OxDN6OtAk5KBpGb28T+HZc2M+ADtvRxXrKKbUwtsLgdoxgX13hyy7ek6bFRl5+aBs2yZzB0c4CnQfAtVypW/g==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.7",
- "for-each": "^0.3.3",
- "gopd": "^1.0.1",
- "has-proto": "^1.0.3",
- "is-typed-array": "^1.1.13",
- "possible-typed-array-names": "^1.0.0"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/unbox-primitive": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz",
- "integrity": "sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==",
- "dev": true,
- "dependencies": {
- "call-bind": "^1.0.2",
- "has-bigints": "^1.0.2",
- "has-symbols": "^1.0.3",
- "which-boxed-primitive": "^1.0.2"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/undici-types": {
- "version": "6.19.8",
- "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
- "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw=="
- },
- "node_modules/uri-js": {
- "version": "4.4.1",
- "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
- "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
- "dependencies": {
- "punycode": "^2.1.0"
- }
- },
- "node_modules/uuid": {
- "version": "3.4.0",
- "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz",
- "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==",
- "deprecated": "Please upgrade to version 7 or higher. Older versions may use Math.random() in certain circumstances, which is known to be problematic. See https://v8.dev/blog/math-random for details.",
- "bin": {
- "uuid": "bin/uuid"
- }
- },
- "node_modules/verror": {
- "version": "1.10.0",
- "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz",
- "integrity": "sha512-ZZKSmDAEFOijERBLkmYfJ+vmk3w+7hOLYDNkRCuRuMJGEmqYNCNLyBBFwWKVMhfwaEF3WOd0Zlw86U/WC/+nYw==",
- "engines": [
- "node >=0.6.0"
- ],
- "dependencies": {
- "assert-plus": "^1.0.0",
- "core-util-is": "1.0.2",
- "extsprintf": "^1.2.0"
- }
- },
- "node_modules/version-guard": {
- "version": "1.1.3",
- "resolved": "https://registry.npmjs.org/version-guard/-/version-guard-1.1.3.tgz",
- "integrity": "sha512-JwPr6erhX53EWH/HCSzfy1tTFrtPXUe927wdM1jqBBeYp1OM+qPHjWbsvv6pIBduqdgxxS+ScfG7S28pzyr2DQ==",
- "dev": true,
- "engines": {
- "node": ">=0.10.48"
- }
- },
- "node_modules/which": {
- "version": "2.0.2",
- "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
- "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
- "dependencies": {
- "isexe": "^2.0.0"
- },
- "bin": {
- "node-which": "bin/node-which"
- },
- "engines": {
- "node": ">= 8"
- }
- },
- "node_modules/which-boxed-primitive": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz",
- "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==",
- "dev": true,
- "dependencies": {
- "is-bigint": "^1.0.1",
- "is-boolean-object": "^1.1.0",
- "is-number-object": "^1.0.4",
- "is-string": "^1.0.5",
- "is-symbol": "^1.0.3"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/which-builtin-type": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.1.4.tgz",
- "integrity": "sha512-bppkmBSsHFmIMSl8BO9TbsyzsvGjVoppt8xUiGzwiu/bhDCGxnpOKCxgqj6GuyHE0mINMDecBFPlOm2hzY084w==",
- "dev": true,
- "dependencies": {
- "function.prototype.name": "^1.1.6",
- "has-tostringtag": "^1.0.2",
- "is-async-function": "^2.0.0",
- "is-date-object": "^1.0.5",
- "is-finalizationregistry": "^1.0.2",
- "is-generator-function": "^1.0.10",
- "is-regex": "^1.1.4",
- "is-weakref": "^1.0.2",
- "isarray": "^2.0.5",
- "which-boxed-primitive": "^1.0.2",
- "which-collection": "^1.0.2",
- "which-typed-array": "^1.1.15"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/which-collection": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz",
- "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==",
- "dev": true,
- "dependencies": {
- "is-map": "^2.0.3",
- "is-set": "^2.0.3",
- "is-weakmap": "^2.0.2",
- "is-weakset": "^2.0.3"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/which-typed-array": {
- "version": "1.1.15",
- "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.15.tgz",
- "integrity": "sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==",
- "dev": true,
- "dependencies": {
- "available-typed-arrays": "^1.0.7",
- "call-bind": "^1.0.7",
- "for-each": "^0.3.3",
- "gopd": "^1.0.1",
- "has-tostringtag": "^1.0.2"
- },
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
- "node_modules/word-wrap": {
- "version": "1.2.5",
- "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
- "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==",
- "dev": true,
- "engines": {
- "node": ">=0.10.0"
- }
- },
- "node_modules/wrap-ansi": {
- "version": "8.1.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
- "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
- "dependencies": {
- "ansi-styles": "^6.1.0",
- "string-width": "^5.0.1",
- "strip-ansi": "^7.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
- }
- },
- "node_modules/wrap-ansi-cjs": {
- "name": "wrap-ansi",
- "version": "7.0.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
- "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
- "dependencies": {
- "ansi-styles": "^4.0.0",
- "string-width": "^4.1.0",
- "strip-ansi": "^6.0.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
- }
- },
- "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": {
- "version": "5.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
- "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/wrap-ansi-cjs/node_modules/ansi-styles": {
- "version": "4.3.0",
- "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
- "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
- "dependencies": {
- "color-convert": "^2.0.1"
- },
- "engines": {
- "node": ">=8"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-styles?sponsor=1"
- }
- },
- "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": {
- "version": "8.0.0",
- "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
- "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
- },
- "node_modules/wrap-ansi-cjs/node_modules/string-width": {
- "version": "4.2.3",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
- "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
- "dependencies": {
- "emoji-regex": "^8.0.0",
- "is-fullwidth-code-point": "^3.0.0",
- "strip-ansi": "^6.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "dependencies": {
- "ansi-regex": "^5.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/wrappy": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
- "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
- "dev": true
- },
- "node_modules/ws": {
- "version": "8.18.0",
- "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
- "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
- "engines": {
- "node": ">=10.0.0"
- },
- "peerDependencies": {
- "bufferutil": "^4.0.1",
- "utf-8-validate": ">=5.0.2"
- },
- "peerDependenciesMeta": {
- "bufferutil": {
- "optional": true
- },
- "utf-8-validate": {
- "optional": true
- }
- }
- },
- "node_modules/xdg-basedir": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/xdg-basedir/-/xdg-basedir-4.0.0.tgz",
- "integrity": "sha512-PSNhEJDejZYV7h50BohL09Er9VaIefr2LMAf3OEmpCkjOi34eYyQYAXUTjEQtZJTKcF0E2UKTh+osDLsgNim9Q==",
- "dev": true,
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/yallist": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
- "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
- "optional": true
- },
- "node_modules/yocto-queue": {
- "version": "0.1.0",
- "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
- "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
- "dev": true,
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- }
- }
-}
diff --git a/tools/cluster-checker/package.json b/tools/cluster-checker/package.json
deleted file mode 100644
index 231ef88..0000000
--- a/tools/cluster-checker/package.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
- "dependencies": {
- "@kubernetes/client-node": "^0.21.0",
- "kubernetes-resource-parser": "0.1.0"
- },
- "devDependencies": {
- "standard": "^17.1.2"
- }
-}
diff --git a/tools/gotmpl/.gitignore b/tools/gotmpl/.gitignore
deleted file mode 100644
index b4c6eb4..0000000
--- a/tools/gotmpl/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-gotmpl
\ No newline at end of file
diff --git a/tools/gotmpl/README.md b/tools/gotmpl/README.md
deleted file mode 100644
index 6ac6911..0000000
--- a/tools/gotmpl/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Go Template Tool
-
-A simple CLI wrapping Go templates that is used to generate the SETUP
-files for the MLBatch project.
-
diff --git a/tools/gotmpl/go.mod b/tools/gotmpl/go.mod
deleted file mode 100644
index 6d690b9..0000000
--- a/tools/gotmpl/go.mod
+++ /dev/null
@@ -1,5 +0,0 @@
-module github.com/project-codeflare/mlbatch/tools/gotmpl
-
-go 1.22.4
-
-require sigs.k8s.io/yaml v1.4.0 // indirect
diff --git a/tools/gotmpl/go.sum b/tools/gotmpl/go.sum
deleted file mode 100644
index 8c72424..0000000
--- a/tools/gotmpl/go.sum
+++ /dev/null
@@ -1,4 +0,0 @@
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
-sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/tools/gotmpl/gotmpl.go b/tools/gotmpl/gotmpl.go
deleted file mode 100644
index eb2dfd8..0000000
--- a/tools/gotmpl/gotmpl.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package main
-
-import (
- "flag"
- "log"
- "os"
- "text/template"
-
- "sigs.k8s.io/yaml"
-)
-
-func main() {
- var input string
- var output string
- var values string
- flag.StringVar(&input, "input", "", "The input template file")
- flag.StringVar(&output, "output", "", "The output file")
- flag.StringVar(&values, "values", "", "The values.yaml file")
-
- flag.CommandLine.SetOutput(os.Stderr)
- flag.Parse()
-
- if input == "" {
- log.Fatal("Must provide input template filename")
- }
-
- if output == "" {
- log.Fatal("Must provide output filename")
- }
-
- if values == "" {
- log.Fatal("Must provide input values filename")
- }
-
- tmpl, err := template.ParseFiles(input)
- if err != nil {
- log.Fatalf("Parsing input template: %v", err)
- }
-
- valueBytes, err := os.ReadFile(values)
- if err != nil {
- log.Fatalf("Reading values: %v", err)
- }
- var vals map[string]interface{}
- err = yaml.Unmarshal(valueBytes, &vals)
- if err != nil {
- log.Fatalf("Processing values: %v", err)
- }
-
- outfile, err := os.Create(output)
- if err != nil {
- log.Fatalf("Creating output file: %v", err)
- }
-
- tmpl.Execute(outfile, vals)
-}
diff --git a/tools/pytorchjob-generator/README.md b/tools/pytorchjob-generator/README.md
deleted file mode 100644
index 84ac186..0000000
--- a/tools/pytorchjob-generator/README.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# PyTorchJob Generator
-
-The Helm chart defined in this folder facilitates the configuration of PyTorch
-jobs for submission to an OpenShift cluster implementing MLBatch.
-
-Invocations of this chart generate a `PyTorchJob` wrapped into an `AppWrapper`
-for better traceability and fault-tolerance.
-
-## Obtaining the Chart
-
-To start with, add the `mlbatch` Helm chart repository.
-```sh
-helm repo add mlbatch https://project-codeflare.github.io/mlbatch
-helm repo update
-```
-To verify the chart was installed correctly, search for `AppWrapper`.
-```sh
-helm search repo AppWrapper
-```
-You should see output similar to the following:
-```sh
-NAME CHART VERSION APP VERSION DESCRIPTION
-mlbatch/pytorchjob-generator 1.1.9 v1beta2 An AppWrapper generator for PyTorchJobs
-```
-
-## Configuring the Job
-
-Create a `settings.yaml` file with the settings for the PyTorch job, for
-example:
-```yaml
-jobName: my-job # name of the generated AppWrapper and PyTorchJob objects (required)
-queueName: default-queue # local queue to submit to (default: default-queue)
-
-numPods: 4 # total pod count including master and worker pods (default: 1)
-numCpusPerPod: 500m # requested number of cpus per pod (default: 1)
-numGpusPerPod: 8 # requested number of gpus per pod (default: 0)
-totalMemoryPerPod: 1Gi # requested amount of memory per pod (default: 1Gi)
-
-priority: default-priority # default-priority (default), low-priority, or high-priority
-
-# container image for the pods (required)
-containerImage: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
-
-# setup commands to run in each pod (optional)
-setupCommands:
-- git clone https://github.com/dbarnett/python-helloworld
-- cd python-helloworld
-
-# main program to invoke via torchrun (optional)
-mainProgram: helloworld.py
-```
-
-To learn more about the available settings see [chart/README.md](chart/README.md).
-
-## Submitting the Job
-
-To submit the Pytorch job to the cluster using the `settings.yaml` file, run:
-```sh
-helm template -f settings.yaml mlbatch/pytorchjob-generator | oc create -f-
-```
-+
-To optionally capture the generated `AppWrapper` specification as a
-`generated.yaml` file, run instead:
-```sh
-helm template -f settings.yaml mlbatch/pytorchjob-generator | tee generated.yaml | oc create -f-
-```
-
-To remove the PyTorch job from the cluster, delete the generated `AppWrapper`
-object:
-```sh
-oc delete appwrapper my-job
-```
diff --git a/tools/pytorchjob-generator/chart/.helmignore b/tools/pytorchjob-generator/chart/.helmignore
deleted file mode 100644
index 2b29f27..0000000
--- a/tools/pytorchjob-generator/chart/.helmignore
+++ /dev/null
@@ -1 +0,0 @@
-tests
diff --git a/tools/pytorchjob-generator/chart/Chart.yaml b/tools/pytorchjob-generator/chart/Chart.yaml
deleted file mode 100644
index 6d45f81..0000000
--- a/tools/pytorchjob-generator/chart/Chart.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-apiVersion: v2
-name: pytorchjob-generator
-description: An AppWrapper generator for PyTorchJobs
-type: application
-version: 1.1.9
-appVersion: "v1beta2"
diff --git a/tools/pytorchjob-generator/chart/README.md b/tools/pytorchjob-generator/chart/README.md
deleted file mode 100644
index f710179..0000000
--- a/tools/pytorchjob-generator/chart/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# pytorchjob-generator
-
-An AppWrapper generator for PyTorchJobs
-
-  
-
-## Overview
-
-This file documents the variables that may be set in a user's `settings.yaml` to
-customize the Jobs generated by the tool.
-
-## Values
-
-### Job Metadata
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| jobName | string | must be provided by user | Name of the Job. Will be the name of the AppWrapper and the PyTorchJob. |
-| namespace | string | `nil` | Namespace in which to run the Job. If unspecified, the namespace will be inferred using normal Helm/Kubernetes mechanisms when the Job is submitted. |
-| queueName | string | `"default-queue"` | Name of the local queue to which the Job will be submitted. |
-| priority | string | `"default-priority"` | Type of priority for the job (choose from: "default-priority", "low-priority" or "high-priority"). |
-| customLabels | array | `nil` | Optional array of custom labels to add to all the resources created by the Job (the PyTorchJob, the PodGroup, and the AppWrapper). |
-| containerImage | string | must be provided by the user | Image used for creating the Job's containers (needs to have all the applications your job may need) |
-| imagePullSecrets | array | `nil` | List of image-pull-secrets to be used for pulling containerImages |
-| imagePullPolicy | string | `"IfNotPresent"` | Policy for pulling containerImages (choose from: "IfNotPresent", "Always", or "Never") |
-
-### Resource Requirements
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| numPods | integer | `1` | Total number of pods (i.e. master + worker pods) to be created |
-| numCpusPerPod | integer or string | `1` | Number of CPUs for each pod. May be a positive integer or a ResourceQuantity (eg 500m) |
-| numGpusPerPod | integer | `0` | Number of GPUs for each pod (all GPUs per node is currently recommended for distributed training). |
-| totalMemoryPerPod | string | `"1Gi"` | Total memory for each pod expressed as a ResourceQuantity (eg 1Gi, 200M, etc.). |
-| limitCpusPerPod | integer or string | numCpusPerPod | Limit on the number of CPUs per pod for elastic jobs. May be a positive integer or a ResourceQuantity (eg 500m). |
-| limitGpusPerPod | integer | numGpusPerPod | Limit of number of GPUs per pod for elastic jobs. |
-| limitMemoryPerPod | string | totalMemoryPerPod | Limit of total memory per pod for elastic jobs (eg 1Gi, 200M, etc.). |
-
-### Workload Specification
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| environmentVariables | array | `nil` | List of variables/values to be defined for all the ranks. Values can be literals or references to Kuberetes secrets or configmaps. See [values.yaml](values.yaml) for examples of supported syntaxes. NOTE: The following standard [PyTorch Distributed environment variables](https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization) are set automatically and can be referenced in the commands without being set manually: WORLD_SIZE, RANK, MASTER_ADDR, MASTER_PORT. |
-| sshGitCloneConfig | object | `nil` | Private GitHub clone support. See [values.yaml](values.yaml) for additional instructions. |
-| setupCommands | array | no custom commands are executed | List of custom commands to be ran at the beginning of the execution. Use `setupCommand` to clone code, download data, and change directories. |
-| mainProgram | string | `nil` | Name of the PyTorch program to be executed by `torchrun`. Please provide your program name here and NOT in "setupCommands" as this helm template provides the necessary "torchrun" arguments for the parallel execution. WARNING: this program is relative to the current path set by change-of-directory commands in "setupCommands". If no value is provided; then only `setupCommands` are executed and torchrun is elided. |
-| volumes | array | No volumes are mounted | List of "(name, claimName, mountPath)" of volumes, with persistentVolumeClaim, to be mounted to the infrastructure |
-
-### Advanced Options
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| roceGdrResName | string | nvidia.com/roce_gdr | RoCE GDR resource name (can vary by cluster configuration) |
-| numRoceGdr | integer | `0` | number of nvidia.com/roce_grd resources (0 means disabled; >0 means enable GDR over RoCE). Must be 0 unless numPods > 1. |
-| topologyFileConfigMap | string | `nil` | Name of configmap containining /var/run/nvidia-topologyd/virtualTopology.xml for the system e.g. nvidia-topo-gdr |
-| ncclGdrEnvConfigMap | string | `nil` | Name of configmap containing NCCL networking environment variables for the system e.g. nccl-netwk-env-vars |
-| multiNicNetworkName | string | `nil` | Name of multi-NIC network, if one is available. Note: when GDR over RoCE is used/available, the RoCE multi-nic network instance should be specified here instead of the TCP multi-nic network instance. Existing instance names can be listed with `oc get multinicnetwork`. |
-| disableSharedMemory | boolean | `false` | Control whether or not a shared memory volume is added to the PyTorchJob. |
-| mountNVMe | object | `nil` | Mount NVMe as a volume. The environment variable MOUNT_PATH_NVME provides the runtime mount path |
-| initContainers | array | `nil` | List of "(name, image, command[])" specifying an init containers to be run before the main job. The 'command' field is a list of commands to run in the container, see the Kubernetes entry on initContainers for reference. |
-| autopilotHealthChecks | array | No pre-flight checks are enabled. | Autopilot health checks. List of labels enabling one or more system health pre-flight checks. |
-| hostIgnoreList | array | `nil` | List of host names on which the Job must not be scheduled (to avoid faulty nodes). |
-| schedulerName | string | `nil` | If non-nil, use the specified Kubernetes scheduler. ***Setting this to the default-scheduler may result in GPU fragmentation on the cluster. Setting this to any non-nil value should only be done when explicitly directed to do so by a cluster admin!*** |
-| serviceAccountName | string | the default service account for the namespace will be used. | Service account to be used for running the Job |
-
-### Fault Tolerance
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| admissionGracePeriodDuration | string | The AppWrapper defaults will be used | Customize the admissionGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| warmupGracePeriodDuration | string | The AppWrapper defaults will be used | Customize the warmupGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| failureGracePeriodDuration | string | The AppWrapper defaults will be used | Customize the failureGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| retryPausePeriodDuration | string | The AppWrapper defaults will be used | Customize the retryPausePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| retryLimit | integer | The AppWrapper defaults will be used | Customize the retryLimit; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| forcefulDeletionGracePeriodDuration | string | The AppWrapper defaults will be used | Customize the forcefulDeletionGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| deletionOnFailureGracePeriodDuration | string | The AppWrapper defaults will be used | Customize the deletionOnFailureGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| successTTLDuration | string | The AppWrapper defaults will be used | Customize the successTTL; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/ |
-| restartPolicy | string | `"Never"` | Set Kubernetes policy for restarting failed containers "in place" (without restarting the Pod). |
-| terminationGracePeriodSeconds | integer | Kubernetes's default value is used | Set a non-default pod termination grace period (in seconds). |
diff --git a/tools/pytorchjob-generator/chart/README.md.gotmpl b/tools/pytorchjob-generator/chart/README.md.gotmpl
deleted file mode 100644
index dbf620e..0000000
--- a/tools/pytorchjob-generator/chart/README.md.gotmpl
+++ /dev/null
@@ -1,11 +0,0 @@
-{{ template "chart.header" . }}
-{{ template "chart.description" . }}
-
-{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
-
-## Overview
-
-This file documents the variables that may be set in a user's `settings.yaml` to
-customize the Jobs generated by the tool.
-
-{{ template "chart.valuesSection" . }}
diff --git a/tools/pytorchjob-generator/chart/templates/_helpers.tpl b/tools/pytorchjob-generator/chart/templates/_helpers.tpl
deleted file mode 100644
index 68b09ab..0000000
--- a/tools/pytorchjob-generator/chart/templates/_helpers.tpl
+++ /dev/null
@@ -1,309 +0,0 @@
-# This file factors out code snippets that are duplicated in both the Master and Worker templates.
-
-{{- define "mlbatch.customLabels" }}
-{{- if .Values.customLabels }}
-{{- range $customLabel := .Values.customLabels }}
-{{ $customLabel.key }}: {{ $customLabel.value }}
-{{- end }}
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.container.metadata" }}
-{{- if or .Values.customLabels .Values.autopilotHealthChecks .Values.multiNicNetworkName }}
-metadata:
- {{- if or .Values.customLabels .Values.autopilotHealthChecks }}
- labels:
- {{- include "mlbatch.customLabels" . | indent 8 }}
- {{- if .Values.autopilotHealthChecks }}
- autopilot: ""
- {{- range $healthcheck := .Values.autopilotHealthChecks }}
- {{ $healthcheck }}: ""
- {{- end }}
- {{- end }}
- {{- end }}
- {{- if .Values.multiNicNetworkName }}
- annotations:
- k8s.v1.cni.cncf.io/networks: {{ .Values.multiNicNetworkName }}
- {{- end }}
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.schedulingSpec" }}
-{{- if ne .Values.terminationGracePeriodSeconds nil }}
-terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
-{{- end }}
-{{- if .Values.schedulerName }}
-schedulerName: {{ .Values.schedulerName }}
-{{- end }}
-priorityClassName: {{ .Values.priority }}
-affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
-{{- if .Values.hostIgnoreList }}
- - key: kubernetes.io/hostname
- operator: NotIn
- values:
- {{- range $host := .Values.hostIgnoreList }}
- - {{ $host }}
- {{- end }}
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.resources" }}
-resources:
- requests:
- cpu: {{ .Values.numCpusPerPod }}
- nvidia.com/gpu: {{ .Values.numGpusPerPod }}
- memory: {{ .Values.totalMemoryPerPod }}
- {{ .Values.roceGdrResName | default "nvidia.com/roce_gdr" }}: {{ .Values.numRoceGdr | default 0 }}
- limits:
- cpu: {{ .Values.limitCpusPerPod | default .Values.numCpusPerPod }}
- nvidia.com/gpu: {{ .Values.limitGpusPerPod | default .Values.numGpusPerPod }}
- memory: {{ .Values.limitMemoryPerPod | default .Values.totalMemoryPerPod }}
- {{ .Values.roceGdrResName | default "nvidia.com/roce_gdr" }}: {{ .Values.numRoceGdr | default 0 }}
-{{- end -}}
-
-
-{{- define "mlbatch.env" }}
-{{- if .Values.ncclGdrEnvConfigMap }}
-envFrom:
- - configMapRef:
- name: {{ .Values.ncclGdrEnvConfigMap }}
-{{- end }}
-{{- if or .Values.environmentVariables .Values.sshGitCloneConfig .Values.mountNVMe .Values.topologyFileConfigMap ( eq .Values.schedulerName "sakkara" ) }}
-env:
- {{- if eq .Values.schedulerName "sakkara" }}
- - name: SAKKARA_RANK
- valueFrom:
- fieldRef:
- fieldPath: metadata.labels['sakkara.member.rank']
- {{- end }}
- {{- if .Values.topologyFileConfigMap }}
- - name: NCCL_TOPO_FILE
- value: /var/run/nvidia-topologyd/virtualTopology.xml
- {{- end }}
- {{- if .Values.mountNVMe }}
- - name: NVME_MOUNT_PATH
- {{- if .Values.mountNVMe.mountPath }}
- value: {{ .Values.mountNVMe.mountPath | quote }}
- {{- else }}
- value: "/workspace/scratch-nvme"
- {{- end }}
- {{- end }}
- {{- range $variable := .Values.environmentVariables }}
- - name: {{ required "Missing 'name' in 'environmentVariables' list element" $variable.name }}
- {{- if $variable.value }}
- value: {{ $variable.value | quote }}
- {{- else if $variable.secret }}
- valueFrom:
- secretKeyRef:
- name: {{ required "Missing 'name' in 'environmentVariables.secret' list element" $variable.secret.name }}
- key: {{ required "Missing 'key' in 'environmentVariables.secret' list element" $variable.secret.key | quote }}
- {{- else if $variable.configmap }}
- valueFrom:
- configMapKeyRef:
- name: {{ required "Missing 'name' in 'environmentVariables.configmap' list element" $variable.configmap.name }}
- key: {{ required "Missing 'key' in 'environmentVariables.configmap' list element" $variable.configmap.key | quote }}
- {{- else if ( kindIs "float64" $variable.value ) }}
- value: "0"
- {{- else }}
- value: {{ required "Missing 'value' in 'environmentVariables' list element" "" }}
- {{- end }}
- {{- end }}
- {{- if .Values.sshGitCloneConfig }}
- - name: GIT_SSH_COMMAND
- {{- if .Values.sshGitCloneConfig.sshCmd }}
- value: {{ .Values.sshGitCloneConfig.sshCmd | quote }}
- {{- else if .Values.sshGitCloneConfig.secretMountPath }}
- {{- if .Values.sshGitCloneConfig.configMapMountPath }}
- value: "ssh -i {{ .Values.sshGitCloneConfig.secretMountPath }}/id_rsa -o UserKnownHostsFile={{ .Values.sshGitCloneConfig.configMapMountPath }}/known_hosts -vv"
- {{- else }}
- value: "ssh -i {{ .Values.sshGitCloneConfig.secretMountPath }}/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv"
- {{- end }}
- {{- else if .Values.sshGitCloneConfig.configMapMountPath }}
- value: "ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile={{ .Values.sshGitCloneConfig.configMapMountPath }}/known_hosts -vv"
- {{- else }}
- value: "ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv"
- {{- end }}
- {{- end }}
-{{- else }}
-env: []
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.command" }}
-command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- {{- if eq .Values.schedulerName "sakkara" }}
- echo "Sakkara is enabled: using Sakkara-assigned rank instead of the default PyTorchJob rank"
- export RANK=$SAKKARA_RANK
- {{- end }}
- {{- range $command := .Values.setupCommands }}
- {{ $command }}
- {{- end }}
- {{- if .Values.mainProgram }}
- {{- if gt ( int .Values.numGpusPerPod ) 0 }}
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node={{ .Values.numGpusPerPod }} --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" {{ .Values.mainProgram }}
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node={{ .Values.numGpusPerPod }} --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" {{ .Values.mainProgram }}
- {{- else }}
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" {{ .Values.mainProgram }}
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" {{ .Values.mainProgram }}
- {{- end }}
- {{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.volumeMounts" }}
-{{- if or .Values.volumes .Values.sshGitCloneConfig ( not .Values.disableSharedMemory ) .Values.mountNVMe }}
-volumeMounts:
- {{- if .Values.topologyFileConfigMap }}
- - name: topology-volume
- mountPath: /var/run/nvidia-topologyd
- {{- end }}
- {{- if .Values.mountNVMe }}
- - name: ephemeral-odf-lvm-vg1
- {{- if .Values.mountNVMe.mountPath }}
- mountPath: {{ .Values.mountNVMe.mountPath | quote }}
- {{- else }}
- mountPath: "/workspace/scratch-nvme"
- {{- end }}
- {{- end }}
- {{- range $volume := .Values.volumes }}
- - name: {{ required "Missing 'name' in 'volumes' list element" $volume.name }}
- mountPath: {{ required "Missing 'mountPath' in 'volumes' list element" $volume.mountPath }}
- {{- end }}
- {{- if .Values.sshGitCloneConfig }}
- - name: private-ssh-git-deploy-key
- readOnly: true
- {{- if .Values.sshGitCloneConfig.secretMountPath }}
- mountPath: {{ .Values.sshGitCloneConfig.secretMountPath }}
- {{- else }}
- mountPath: "/tmp/.ssh/keys"
- {{- end }}
- - name: github-known-hosts
- {{- if .Values.sshGitCloneConfig.configMapMountPath }}
- mountPath: {{ .Values.sshGitCloneConfig.configMapMountPath }}
- {{- else }}
- mountPath: "/tmp/.ssh/hosts"
- {{- end }}
- {{- end }}
- {{- if eq .Values.disableSharedMemory false }}
- - name: dshm
- mountPath: "/dev/shm"
- {{- end }}
-{{- else }}
-volumeMounts: []
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.volumes" }}
-{{- if or .Values.volumes .Values.sshGitCloneConfig ( not .Values.disableSharedMemory ) .Values.mountNVMe }}
-volumes:
- {{- if .Values.topologyFileConfigMap }}
- - name: topology-volume
- configMap:
- name: {{ .Values.topologyFileConfigMap }}
- {{- end }}
- {{- if .Values.mountNVMe }}
- - name: ephemeral-odf-lvm-vg1
- ephemeral:
- volumeClaimTemplate:
- spec:
- storageClassName: odf-lvm-vg1
- volumeMode: Filesystem
- accessModes: [ "ReadWriteOnce" ]
- resources:
- requests:
- storage: {{ .Values.mountNVMe.storage }}
- {{- end }}
- {{- range $volume := .Values.volumes }}
- - name: {{ required "Missing 'name' in 'volumes' list element" $volume.name }}
- persistentVolumeClaim:
- claimName: {{ required "Missing 'claimName' in 'volumes' list element" $volume.claimName }}
- {{- end }}
- {{- if .Values.sshGitCloneConfig }}
- - name: private-ssh-git-deploy-key
- secret:
- secretName: {{ required "Missing 'secretName' in 'sshGitCloneConfig' " .Values.sshGitCloneConfig.secretName }}
- optional: false
- - name: github-known-hosts
- configMap:
- name: {{ required "Missing 'configMapName' in 'sshGitCloneConfig' " .Values.sshGitCloneConfig.configMapName }}
- {{- end }}
-
-{{- if eq .Values.disableSharedMemory false }}
- - name: dshm
- emptyDir:
- medium: Memory
- {{- end }}
-{{- else }}
-volumes: []
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.initContainers" }}
-{{- if .Values.initContainers }}
-initContainers:
- {{- range $container := .Values.initContainers }}
- - name: {{ required "Missing 'name' of initContainer" $container.name }}
- image: {{ required "Missing 'image' of initContainer" $container.image }}
- {{- if ( required "Missing 'command' array of initContainer" $container.command ) }}
- {{- if kindIs "string" $container.command }}
- command: {{ $container.command }}
- {{- else }}
- command:
- {{- range $command := $container.command }}
- - {{ $command }}
- {{- end }}
- {{- end }}
- {{- end }}
- {{- end }}
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.imagePullSecrets" }}
-{{- if .Values.imagePullSecrets }}
-imagePullSecrets:
- {{- range $secret := .Values.imagePullSecrets }}
- - name: {{ $secret.name }}
- {{- end }}
-{{- else }}
-imagePullSecrets: []
-{{- end }}
-{{- end -}}
-
-
-{{- define "mlbatch.securityContext" }}
-{{- if or (gt ( int .Values.numRoceGdr ) 0) (eq .Values.serviceAccountName "gdr") }}
-securityContext:
- capabilities:
- add:
- - IPC_LOCK
-{{- end }}
-{{- end -}}
diff --git a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml b/tools/pytorchjob-generator/chart/templates/appwrapper.yaml
deleted file mode 100644
index 7702e3e..0000000
--- a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-{{- if .Values.jobName -}}
-{{- if eq ( regexMatch "^[a-z]([-a-z0-9]*[a-z0-9])?$" .Values.jobName ) false -}}
-{{ required "The 'jobName' provided is NOT correct. Some possible causes are: it begins with a number or a special character (including '-'), has one or more capital letters somewhere in the name, has one or more special characters other than '-', it ends with a special character (including '-')" "" }}
-{{- else -}}
-{{- if gt ( len .Values.jobName ) 50 -}}
-{{ required "Your 'jobName' cannot be longer than 50 characters" "" -}}
-{{- end -}}
-{{- end -}}
-{{- else -}}
-{{ required "Please specify an 'jobName' in the user file" "" -}}
-{{- end -}}
-
-{{- if .Values.mountNVMe -}}
-{{- if (not .Values.mountNVMe.storage) -}}
-{{ required "A 'storage' value is required for mountNVMe" "" }}
-{{- end -}}
-{{- end -}}
-
-{{- if .Values.customLabels -}}
-{{- range $customLabel := .Values.customLabels -}}
-{{- if not $customLabel.key -}}
-{{ required "Missing 'key' in 'customLabels' list element" $customLabel.key }}
-{{- end -}}
-{{- $customLabelKey := split "/" $customLabel.key -}}
-{{- if gt ( len $customLabelKey._0 ) 63 -}}
-{{ required "The name of the 'customLabels.key' must be less than 64 characters" "" }}
-{{- end -}}
-{{- if eq ( regexMatch "^[a-z]([-a-z0-9._]*[a-z0-9])?$" $customLabelKey._0 ) false -}}
-{{ required "The name of the 'customLabels.key' provided is NOT correct. Some possible causes are: it begins with a number or a special character (including '-._'), has one or more capital letters somewhere in the key, has one or more special characters other than '-._', it ends with a special character (including '-._')" "" }}
-{{- end -}}
-{{- if $customLabelKey._1 }}
-{{- if gt ( len $customLabelKey._1 ) 254 -}}
-{{ required "The prefix of the 'customLabels.key' must be less than 254 characters" "" }}
-{{- end -}}
-{{- if eq ( regexMatch "^[a-z]([-a-z0-9.]*[a-z0-9])?$" $customLabelKey._1 ) false -}}
-{{ required "The prefix of the 'customLabels.key' provided is NOT correct. Some possible causes are: it begins with a number or a special character (including '-.'), has one or more capital letters somewhere in the key, has one or more special characters other than '-.', it ends with a special character (including '-.')" "" }}
-{{- end -}}
-{{- end -}}
-{{- if not $customLabel.value -}}
-{{ required "Missing 'value' in 'customLabels' list element" $customLabel.value }}
-{{- end -}}
-{{- if gt ( len $customLabel.value ) 63 -}}
-{{ required "The length of the 'customLabels.value' must be less than 64 characters" "" }}
-{{- end -}}
-{{- if eq ( regexMatch "^[a-z]([-a-z0-9._]*[a-z0-9])?$" $customLabel.value ) false -}}
-{{ required "The 'customLabels.value' provided is NOT correct. Some possible causes are: it begins with a number or a special character (including '-._'), has one or more capital letters somewhere in the name, has one or more special characters other than '-._', it ends with a special character (including '-._')" "" }}
-{{- end -}}
-{{- end -}}
-{{- end -}}
-
-apiVersion: workload.codeflare.dev/v1beta2
-kind: AppWrapper
-metadata:
- name: {{ .Values.jobName }}
- {{- if .Values.namespace }}
- namespace: {{ .Values.namespace }}
- {{- end }}
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: "{{ .Chart.Version }}"
- {{- if .Values.admissionGracePeriodDuration }}
- workload.codeflare.dev.appwrapper/admissionGracePeriodDuration: "{{ .Values.admissionGracePeriodDuration }}"
- {{- end }}
- {{- if .Values.warmupGracePeriodDuration }}
- workload.codeflare.dev.appwrapper/warmupGracePeriodDuration: "{{ .Values.warmupGracePeriodDuration }}"
- {{- end }}
- {{- if .Values.failureGracePeriodDuration }}
- workload.codeflare.dev.appwrapper/failureGracePeriodDuration: "{{ .Values.failureGracePeriodDuration }}"
- {{- end }}
- {{- if .Values.retryPausePeriodDuration }}
- workload.codeflare.dev.appwrapper/retryPausePeriodDuration: "{{ .Values.retryPausePeriodDuration }}"
- {{- end }}
- {{- if ne .Values.retryLimit nil }}
- workload.codeflare.dev.appwrapper/retryLimit: "{{ .Values.retryLimit }}"
- {{- end }}
- {{- if .Values.forcefulDeletionGracePeriodDuration }}
- workload.codeflare.dev.appwrapper/forcefulDeletionGracePeriodDuration: "{{ .Values.forcefulDeletionGracePeriodDuration }}"
- {{- end }}
- {{- if .Values.deletionOnFailureGracePeriodDuration }}
- workload.codeflare.dev.appwrapper/deletionOnFailureGracePeriodDuration: "{{ .Values.deletionOnFailureGracePeriodDuration }}"
- {{- end }}
- {{- if .Values.successTTLDuration }}
- workload.codeflare.dev.appwrapper/successTTLDuration: "{{ .Values.successTTLDuration }}"
- {{- end }}
- {{- if or .Values.queueName .Values.customLabels }}
- labels:
- {{- if .Values.queueName }}
- kueue.x-k8s.io/queue-name: {{ .Values.queueName }}
- {{- end }}
- {{- include "mlbatch.customLabels" . | indent 8 }}
- {{- end }}
-spec:
- components:
- - template:
- apiVersion: "kubeflow.org/v1"
- kind: "PyTorchJob"
- metadata:
- name: {{ .Values.jobName }}
- {{- if .Values.customLabels }}
- labels:
- {{- include "mlbatch.customLabels" . | indent 26 }}
- {{- end }}
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: {{ .Values.restartPolicy | default "Never" }}
- template:
- {{- include "mlbatch.container.metadata" . | indent 34 }}
- spec:
- {{- if .Values.serviceAccountName }}
- serviceAccountName: {{ .Values.serviceAccountName }}
- {{- end }}
- {{- include "mlbatch.imagePullSecrets" . | indent 38 }}
- {{- include "mlbatch.initContainers" . | indent 38 }}
- {{- include "mlbatch.schedulingSpec" . | indent 38 }}
- {{- include "mlbatch.volumes" . | indent 38 }}
- containers:
- - name: pytorch
- image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }}
- imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
- {{- include "mlbatch.securityContext" . | indent 44 }}
- {{- include "mlbatch.env" . | indent 44 }}
- {{- include "mlbatch.volumeMounts" . | indent 44 }}
- {{- include "mlbatch.resources" . | indent 44 }}
- {{- include "mlbatch.command" . | indent 44 }}
- {{- if gt ( .Values.numPods | int ) 1 }} {{- /*Including a worker spec when only 1 pod (Master) is specified leads to strange behavior */}}
- Worker:
- replicas: {{ sub .Values.numPods 1 }}
- restartPolicy: {{ .Values.restartPolicy | default "Never" }}
- template:
- {{- include "mlbatch.container.metadata" . | indent 34 }}
- spec:
- {{- if .Values.serviceAccountName }}
- serviceAccountName: {{ .Values.serviceAccountName }}
- {{- end }}
- {{- include "mlbatch.imagePullSecrets" . | indent 38 }}
- {{- include "mlbatch.initContainers" . | indent 38 }}
- {{- include "mlbatch.schedulingSpec" . | indent 38 }}
- {{- include "mlbatch.volumes" . | indent 38 }}
- containers:
- - name: pytorch
- image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }}
- imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
- {{- include "mlbatch.securityContext" . | indent 44 }}
- {{- include "mlbatch.env" . | indent 44 }}
- {{- include "mlbatch.volumeMounts" . | indent 44 }}
- {{- include "mlbatch.resources" . | indent 44 }}
- {{- include "mlbatch.command" . | indent 44 }}
- {{- end }}
diff --git a/tools/pytorchjob-generator/chart/tests/__snapshot__/helloworld_test.yaml.snap b/tools/pytorchjob-generator/chart/tests/__snapshot__/helloworld_test.yaml.snap
deleted file mode 100644
index 16870fc..0000000
--- a/tools/pytorchjob-generator/chart/tests/__snapshot__/helloworld_test.yaml.snap
+++ /dev/null
@@ -1,1678 +0,0 @@
-Adding Volume Mounts:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /path/to/where/you/want/to/find/your/data
- name: arbitrary-name-0
- - mountPath: /path/to/where/you/want/to/find/your/data-redux
- name: arbitrary-name-1
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - name: arbitrary-name-0
- persistentVolumeClaim:
- claimName: name-matching-the-actual-PersistentVolumeClaim
- - name: arbitrary-name-1
- persistentVolumeClaim:
- claimName: name-matching-another-actual-PersistentVolumeClaim
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /path/to/where/you/want/to/find/your/data
- name: arbitrary-name-0
- - mountPath: /path/to/where/you/want/to/find/your/data-redux
- name: arbitrary-name-1
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - name: arbitrary-name-0
- persistentVolumeClaim:
- claimName: name-matching-the-actual-PersistentVolumeClaim
- - name: arbitrary-name-1
- persistentVolumeClaim:
- claimName: name-matching-another-actual-PersistentVolumeClaim
- - emptyDir:
- medium: Memory
- name: dshm
-Adding initContainers:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- initContainers:
- - command:
- - sh
- - -c
- - whoami && ls -l
- image: busybox
- name: init-container-1
- - command:
- - sh
- - -c
- - echo hello world!
- image: ubuntu
- name: init-container-2
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- initContainers:
- - command:
- - sh
- - -c
- - whoami && ls -l
- image: busybox
- name: init-container-1
- - command:
- - sh
- - -c
- - echo hello world!
- image: ubuntu
- name: init-container-2
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
-AppWrapper metadata should match snapshot:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
-AppWrapper spec should match snapshot:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env: []
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
-Enabling NVMe:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NVME_MOUNT_PATH
- value: /workspace/scratch-nvme
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /workspace/scratch-nvme
- name: ephemeral-odf-lvm-vg1
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - ephemeral:
- volumeClaimTemplate:
- spec:
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: 800Gi
- storageClassName: odf-lvm-vg1
- volumeMode: Filesystem
- name: ephemeral-odf-lvm-vg1
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NVME_MOUNT_PATH
- value: /workspace/scratch-nvme
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /workspace/scratch-nvme
- name: ephemeral-odf-lvm-vg1
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - ephemeral:
- volumeClaimTemplate:
- spec:
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: 800Gi
- storageClassName: odf-lvm-vg1
- volumeMode: Filesystem
- name: ephemeral-odf-lvm-vg1
- - emptyDir:
- medium: Memory
- name: dshm
-Enabling RoCE GDR:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- metadata:
- annotations:
- k8s.v1.cni.cncf.io/networks: multi-nic-cni-operator-ipvlanl3
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NCCL_TOPO_FILE
- value: /var/run/nvidia-topologyd/virtualTopology.xml
- envFrom:
- - configMapRef:
- name: nccl-netwk-env-vars
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- securityContext:
- capabilities:
- add:
- - IPC_LOCK
- volumeMounts:
- - mountPath: /var/run/nvidia-topologyd
- name: topology-volume
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - configMap:
- name: nvidia-topo-gdr
- name: topology-volume
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- metadata:
- annotations:
- k8s.v1.cni.cncf.io/networks: multi-nic-cni-operator-ipvlanl3
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NCCL_TOPO_FILE
- value: /var/run/nvidia-topologyd/virtualTopology.xml
- envFrom:
- - configMapRef:
- name: nccl-netwk-env-vars
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- securityContext:
- capabilities:
- add:
- - IPC_LOCK
- volumeMounts:
- - mountPath: /var/run/nvidia-topologyd
- name: topology-volume
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - configMap:
- name: nvidia-topo-gdr
- name: topology-volume
- - emptyDir:
- medium: Memory
- name: dshm
-Enabling all advanced features at once:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- metadata:
- annotations:
- k8s.v1.cni.cncf.io/networks: multi-nic-cni-operator-ipvlanl3
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NCCL_TOPO_FILE
- value: /var/run/nvidia-topologyd/virtualTopology.xml
- - name: NVME_MOUNT_PATH
- value: /workspace/scratch-nvme
- - name: GIT_SSH_COMMAND
- value: ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv
- envFrom:
- - configMapRef:
- name: nccl-netwk-env-vars
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- securityContext:
- capabilities:
- add:
- - IPC_LOCK
- volumeMounts:
- - mountPath: /var/run/nvidia-topologyd
- name: topology-volume
- - mountPath: /workspace/scratch-nvme
- name: ephemeral-odf-lvm-vg1
- - mountPath: /path/to/where/you/want/to/find/your/data
- name: arbitrary-name-0
- - mountPath: /path/to/where/you/want/to/find/your/data-redux
- name: arbitrary-name-1
- - mountPath: /tmp/.ssh/keys
- name: private-ssh-git-deploy-key
- readOnly: true
- - mountPath: /tmp/.ssh/hosts
- name: github-known-hosts
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- initContainers:
- - command:
- - sh
- - -c
- - whoami && ls -l
- image: busybox
- name: init-container-1
- - command:
- - sh
- - -c
- - echo hello world!
- image: ubuntu
- name: init-container-2
- priorityClassName: default-priority
- volumes:
- - configMap:
- name: nvidia-topo-gdr
- name: topology-volume
- - ephemeral:
- volumeClaimTemplate:
- spec:
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: 800Gi
- storageClassName: odf-lvm-vg1
- volumeMode: Filesystem
- name: ephemeral-odf-lvm-vg1
- - name: arbitrary-name-0
- persistentVolumeClaim:
- claimName: name-matching-the-actual-PersistentVolumeClaim
- - name: arbitrary-name-1
- persistentVolumeClaim:
- claimName: name-matching-another-actual-PersistentVolumeClaim
- - name: private-ssh-git-deploy-key
- secret:
- optional: false
- secretName: my-git-secret
- - configMap:
- name: my-git-config-map
- name: github-known-hosts
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- metadata:
- annotations:
- k8s.v1.cni.cncf.io/networks: multi-nic-cni-operator-ipvlanl3
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: NCCL_TOPO_FILE
- value: /var/run/nvidia-topologyd/virtualTopology.xml
- - name: NVME_MOUNT_PATH
- value: /workspace/scratch-nvme
- - name: GIT_SSH_COMMAND
- value: ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv
- envFrom:
- - configMapRef:
- name: nccl-netwk-env-vars
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 2
- securityContext:
- capabilities:
- add:
- - IPC_LOCK
- volumeMounts:
- - mountPath: /var/run/nvidia-topologyd
- name: topology-volume
- - mountPath: /workspace/scratch-nvme
- name: ephemeral-odf-lvm-vg1
- - mountPath: /path/to/where/you/want/to/find/your/data
- name: arbitrary-name-0
- - mountPath: /path/to/where/you/want/to/find/your/data-redux
- name: arbitrary-name-1
- - mountPath: /tmp/.ssh/keys
- name: private-ssh-git-deploy-key
- readOnly: true
- - mountPath: /tmp/.ssh/hosts
- name: github-known-hosts
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- initContainers:
- - command:
- - sh
- - -c
- - whoami && ls -l
- image: busybox
- name: init-container-1
- - command:
- - sh
- - -c
- - echo hello world!
- image: ubuntu
- name: init-container-2
- priorityClassName: default-priority
- volumes:
- - configMap:
- name: nvidia-topo-gdr
- name: topology-volume
- - ephemeral:
- volumeClaimTemplate:
- spec:
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: 800Gi
- storageClassName: odf-lvm-vg1
- volumeMode: Filesystem
- name: ephemeral-odf-lvm-vg1
- - name: arbitrary-name-0
- persistentVolumeClaim:
- claimName: name-matching-the-actual-PersistentVolumeClaim
- - name: arbitrary-name-1
- persistentVolumeClaim:
- claimName: name-matching-another-actual-PersistentVolumeClaim
- - name: private-ssh-git-deploy-key
- secret:
- optional: false
- secretName: my-git-secret
- - configMap:
- name: my-git-config-map
- name: github-known-hosts
- - emptyDir:
- medium: Memory
- name: dshm
-Enabling sshGitConfig injects the envvars, volumes, and volumeMounts:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: GIT_SSH_COMMAND
- value: ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /tmp/.ssh/keys
- name: private-ssh-git-deploy-key
- readOnly: true
- - mountPath: /tmp/.ssh/hosts
- name: github-known-hosts
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - name: private-ssh-git-deploy-key
- secret:
- optional: false
- secretName: my-git-secret
- - configMap:
- name: my-git-config-map
- name: github-known-hosts
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: GIT_SSH_COMMAND
- value: ssh -i /tmp/.ssh/keys/id_rsa -o UserKnownHostsFile=/tmp/.ssh/hosts/known_hosts -vv
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /tmp/.ssh/keys
- name: private-ssh-git-deploy-key
- readOnly: true
- - mountPath: /tmp/.ssh/hosts
- name: github-known-hosts
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - name: private-ssh-git-deploy-key
- secret:
- optional: false
- secretName: my-git-secret
- - configMap:
- name: my-git-config-map
- name: github-known-hosts
- - emptyDir:
- medium: Memory
- name: dshm
-scheduler can be set:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- echo "Sakkara is enabled: using Sakkara-assigned rank instead of the default PyTorchJob rank"
- export RANK=$SAKKARA_RANK
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: SAKKARA_RANK
- valueFrom:
- fieldRef:
- fieldPath: metadata.labels['sakkara.member.rank']
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- schedulerName: sakkara
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- echo "Sakkara is enabled: using Sakkara-assigned rank instead of the default PyTorchJob rank"
- export RANK=$SAKKARA_RANK
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: SAKKARA_RANK
- valueFrom:
- fieldRef:
- fieldPath: metadata.labels['sakkara.member.rank']
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- schedulerName: sakkara
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
-user-defined environment variables:
- 1: |
- apiVersion: workload.codeflare.dev/v1beta2
- kind: AppWrapper
- metadata:
- annotations:
- workload.codeflare.dev.mlbatch/pytorchGeneratorVersion: 1.1.9
- labels:
- kueue.x-k8s.io/queue-name: default-queue
- name: my-job
- namespace: my-namespace
- spec:
- components:
- - template:
- apiVersion: kubeflow.org/v1
- kind: PyTorchJob
- metadata:
- name: my-job
- spec:
- pytorchReplicaSpecs:
- Master:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: EXAMPLE_VAR1
- value: "6"
- - name: EXAMPLE_VAR2
- value: example2string
- - name: EXAMPLE_VAR3
- valueFrom:
- secretKeyRef:
- key: my-secret-key
- name: my-secret-name
- - name: EXAMPLE_VAR4
- valueFrom:
- configMapKeyRef:
- key: my-configmap-key
- name: my-configmap-name
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
- Worker:
- replicas: 3
- restartPolicy: Never
- template:
- spec:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: autopilot.ibm.com/gpuhealth
- operator: NotIn
- values:
- - ERR
- - TESTING
- - EVICT
- containers:
- - command:
- - sh
- - -c
- - |
- echo "Environment variables set by the kubeflow training operator:"
- echo ${MASTER_ADDR}:${MASTER_PORT}
- echo "PYTHONUNBUFFERED:"${PYTHONUNBUFFERED}
- echo My global rank is ${RANK} / ${WORLD_SIZE}
- echo "Other injected environment variables:"
- echo "NVME_MOUNT_PATH: "${NVME_MOUNT_PATH}
- #
- # User commands
- #
- git clone https://github.com/dbarnett/python-helloworld
- cd python-helloworld
- echo executing: torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- torchrun --nnodes=${WORLD_SIZE} --node_rank=${RANK} --nproc_per_node=8 --rdzv_id=101 --rdzv_endpoint="${MASTER_ADDR}:${MASTER_PORT}" helloworld.py
- env:
- - name: EXAMPLE_VAR1
- value: "6"
- - name: EXAMPLE_VAR2
- value: example2string
- - name: EXAMPLE_VAR3
- valueFrom:
- secretKeyRef:
- key: my-secret-key
- name: my-secret-name
- - name: EXAMPLE_VAR4
- valueFrom:
- configMapKeyRef:
- key: my-configmap-key
- name: my-configmap-name
- image: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
- imagePullPolicy: IfNotPresent
- name: pytorch
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- requests:
- cpu: 500m
- memory: 1Gi
- nvidia.com/gpu: 8
- nvidia.com/roce_gdr: 0
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- imagePullSecrets: []
- priorityClassName: default-priority
- volumes:
- - emptyDir:
- medium: Memory
- name: dshm
diff --git a/tools/pytorchjob-generator/chart/tests/helloworld.settings.yaml b/tools/pytorchjob-generator/chart/tests/helloworld.settings.yaml
deleted file mode 100644
index 7fafe14..0000000
--- a/tools/pytorchjob-generator/chart/tests/helloworld.settings.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-namespace: my-namespace # namespace to deploy to (required)
-jobName: my-job # name of the generated AppWrapper and PyTorchJob objects (required)
-queueName: default-queue # local queue to submit to (default: default-queue)
-
-numPods: 4 # total pod count including master and worker pods (default: 1)
-numCpusPerPod: 500m # requested number of cpus per pod (default: 1)
-numGpusPerPod: 8 # requested number of gpus per pod (default: 0)
-totalMemoryPerPod: 1Gi # requested amount of memory per pod (default: 1Gi)
-
-priority: default-priority # default-priority (default), low-priority, or high-priority
-
-# container image for the pods (required)
-containerImage: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
-
-# setup commands to run in each pod (optional)
-setupCommands:
-- git clone https://github.com/dbarnett/python-helloworld
-- cd python-helloworld
-
-# main program to invoke via torchrun (optional)
-mainProgram: helloworld.py
diff --git a/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml b/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml
deleted file mode 100644
index 83aa908..0000000
--- a/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml
+++ /dev/null
@@ -1,272 +0,0 @@
-suite: AppWrapper Unit Tests
-templates:
-- templates/appwrapper.yaml
-values:
-- helloworld.settings.yaml
-tests:
-- it: AppWrapper metadata should match snapshot
- asserts:
- - matchSnapshot:
- path: spec
-
-- it: AppWrapper spec should match snapshot
- asserts:
- - matchSnapshot:
- path: spec
-
-- it: PyTorch worker is elided for single pod Jobs
- set:
- numPods: 1
- asserts:
- - exists:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master
- - notExists:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker
-
-- it: Custom labels are injected at all levels
- set:
- customLabels:
- - key: project-name
- value: my-project
- asserts:
- - isSubset:
- path: metadata.labels
- content:
- project-name: my-project
- - isSubset:
- path: spec.components[0].template.metadata.labels
- content:
- project-name: my-project
- - isSubset:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.metadata.labels
- content:
- project-name: my-project
- - isSubset:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.metadata.labels
- content:
- project-name: my-project
-
-- it: Invalid job names are rejected
- set:
- jobName: 123Job
- asserts:
- - failedTemplate: {}
-
-- it: Long job names are rejected
- set:
- jobName: this-job-name-is-just-way-too-long-to-be-acceptable-for-our-chart
- asserts:
- - failedTemplate: {}
-
-- it: Disabling shared memory removes volumes and volume mounts
- set:
- disableSharedMemory: true
- asserts:
- - isEmpty:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.spec.volumes
- - isEmpty:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.spec.containers[0].volumeMounts
- - isEmpty:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.volumes
- - isEmpty:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.containers[0].volumeMounts
-
-- it: queueName can be disabled
- set:
- queueName:
- asserts:
- - notExists:
- path: metadata.labels
-
-- it: namespace can be set
- set:
- namespace: testing-ns
- asserts:
- - equal:
- path: metadata.namespace
- value: testing-ns
-
-- it: scheduler can be set
- set:
- schedulerName: sakkara
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: imagePullPolicy can be set
- set:
- imagePullPolicy: Always
- asserts:
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.containers[0].imagePullPolicy
- value: Always
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.containers[0].imagePullPolicy
- value: Always
-
-- it: Invalid imagePullPolicies are rejected
- set:
- imagePullPolicy: Sometimes
- asserts:
- - failedTemplate: {}
-
-- it: Enabling sshGitConfig injects the envvars, volumes, and volumeMounts
- set:
- sshGitCloneConfig.secretName: my-git-secret
- sshGitCloneConfig.configMapName: my-git-config-map
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: user-defined environment variables
- set:
- environmentVariables:
- - name: EXAMPLE_VAR1
- value: 6
- - name: EXAMPLE_VAR2
- value: "example2string"
- - name: EXAMPLE_VAR3
- secret:
- name: my-secret-name
- key: my-secret-key
- - name: EXAMPLE_VAR4
- configmap:
- name: my-configmap-name
- key: my-configmap-key
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: Enabling RoCE GDR
- set:
- roceGdrResName: nvidia.com/roce_gdr
- numRoceGdr: 2
- topologyFileConfigMap: nvidia-topo-gdr
- ncclGdrEnvConfigMap: nccl-netwk-env-vars
- multiNicNetworkName: multi-nic-cni-operator-ipvlanl3
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: Enabling NVMe
- set:
- mountNVMe.storage: 800Gi
- mountNVMe.mountPath: "/workspace/scratch-nvme"
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: imagePullSecrets
- set:
- imagePullSecrets:
- - name: secret-one
- asserts:
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.spec.imagePullSecrets[0].name
- value: secret-one
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.imagePullSecrets[0].name
- value: secret-one
-
-- it: Adding Volume Mounts
- set:
- volumes:
- - name: arbitrary-name-0
- claimName: name-matching-the-actual-PersistentVolumeClaim
- mountPath: /path/to/where/you/want/to/find/your/data
- - name: arbitrary-name-1
- claimName: name-matching-another-actual-PersistentVolumeClaim
- mountPath: /path/to/where/you/want/to/find/your/data-redux
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
-
-- it: Adding initContainers
- set:
- initContainers:
- - name: init-container-1
- image: busybox
- command: ["sh", "-c", "whoami && ls -l"]
- - name: init-container-2
- image: ubuntu
- command: ["sh", "-c", "echo hello world!"]
- asserts:
- - matchSnapshot:
- patch: spec.components[0].template
-
-- it: Setting fault tolerance annotations
- set:
- admissionGracePeriodDuration: "10s"
- warmupGracePeriodDuration: "11s"
- failureGracePeriodDuration: "22s"
- retryPausePeriodDuration: "17s"
- retryLimit: 42
- forcefulDeletionGracePeriodDuration: "19s"
- deletionOnFailureGracePeriodDuration: "2s"
- successTTLDuration: "600s"
- asserts:
- - isSubset:
- path: metadata.annotations
- content:
- workload.codeflare.dev.appwrapper/admissionGracePeriodDuration: "10s"
- workload.codeflare.dev.appwrapper/warmupGracePeriodDuration: "11s"
- workload.codeflare.dev.appwrapper/failureGracePeriodDuration: "22s"
- workload.codeflare.dev.appwrapper/retryPausePeriodDuration: "17s"
- workload.codeflare.dev.appwrapper/retryLimit: "42"
- workload.codeflare.dev.appwrapper/forcefulDeletionGracePeriodDuration: "19s"
- workload.codeflare.dev.appwrapper/deletionOnFailureGracePeriodDuration: "2s"
- workload.codeflare.dev.appwrapper/successTTLDuration: "600s"
-
-- it: Setting integer fault tolerance annotation to 0
- set:
- retryLimit: 0
- terminationGracePeriodSeconds: 0
- asserts:
- - isSubset:
- path: metadata.annotations
- content:
- workload.codeflare.dev.appwrapper/retryLimit: "0"
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.spec.terminationGracePeriodSeconds
- value: 0
- - equal:
- path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.terminationGracePeriodSeconds
- value: 0
-
-- it: Setting just one tolerance annotation
- set:
- deletionOnFailureGracePeriodDuration: "6h"
- asserts:
- - isSubset:
- path: metadata.annotations
- content:
- workload.codeflare.dev.appwrapper/deletionOnFailureGracePeriodDuration: "6h"
-
-- it: Enabling all advanced features at once
- set:
- sshGitCloneConfig.secretName: my-git-secret
- sshGitCloneConfig.configMapName: my-git-config-map
- roceGdrResName: nvidia.com/roce_gdr
- numRoceGdr: 2
- topologyFileConfigMap: nvidia-topo-gdr
- ncclGdrEnvConfigMap: nccl-netwk-env-vars
- multiNicNetworkName: multi-nic-cni-operator-ipvlanl3
- mountNVMe.storage: 800Gi
- mountNVMe.mountPath: "/workspace/scratch-nvme"
- volumes:
- - name: arbitrary-name-0
- claimName: name-matching-the-actual-PersistentVolumeClaim
- mountPath: /path/to/where/you/want/to/find/your/data
- - name: arbitrary-name-1
- claimName: name-matching-another-actual-PersistentVolumeClaim
- mountPath: /path/to/where/you/want/to/find/your/data-redux
- initContainers:
- - name: init-container-1
- image: busybox
- command: ["sh", "-c", "whoami && ls -l"]
- - name: init-container-2
- image: ubuntu
- command: ["sh", "-c", "echo hello world!"]
- asserts:
- - matchSnapshot:
- path: spec.components[0].template
diff --git a/tools/pytorchjob-generator/chart/values.schema.json b/tools/pytorchjob-generator/chart/values.schema.json
deleted file mode 100644
index 9bebe8f..0000000
--- a/tools/pytorchjob-generator/chart/values.schema.json
+++ /dev/null
@@ -1,200 +0,0 @@
-{
- "$schema": "https://json-schema.org/draft/2020-12/schema#",
- "type": "object",
- "required": [
- "jobName",
- "containerImage"
- ],
- "additionalProperties": false,
- "properties": {
- "jobName": { "type": "string" },
- "namespace": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/rfc1123Label" }
- ]},
- "queueName": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/rfc1123Label" }
- ]},
- "priority": { "type": "string", "enum": [ "default-priority", "low-priority", "high-priority" ] },
- "customLabels": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "containerImage": { "type": "string" },
- "numPods" : { "type": "integer", "minimum": 1 },
- "numCpusPerPod": { "$ref": "#/$defs/resourceCPU" },
- "numGpusPerPod": { "type": "integer", "minimum": 0 },
- "totalMemoryPerPod": { "$ref": "#/$defs/resourceMemory" },
- "limitCpusPerPod": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/resourceCPU" }
- ]},
- "limitGpusPerPod":{ "oneOf": [
- { "type": "null" },
- { "type": "integer", "minimum": 0 }
- ]},
- "limitMemoryPerPod": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/resourceMemory" }
- ]},
- "environmentVariables": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "sshGitCloneConfig": { "oneOf": [
- { "type": "null" },
- {
- "type": "object",
- "properties": {
- "secretName": { "$ref": "#/$defs/rfc1123Label" },
- "configMapName": { "$ref": "#/$defs/rfc1123Label" },
- "secretMountPath": { "type": "string" },
- "configMapMountPath": { "type": "string" },
- "sshCmd": { "type": "string" }
- },
- "required": [ "secretName", "configMapName" ],
- "additionalProperties": false
- }
- ]},
- "setupCommands": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "mainProgram": { "oneOf": [
- { "type": "null" },
- { "type": "string" }
- ]},
- "imagePullSecrets": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "imagePullPolicy": { "oneOf": [
- { "type": "null" },
- { "type": "string", "enum": [ "IfNotPresent", "Always", "Never" ] }
- ]},
- "volumes": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "roceGdrResName": { "oneOf": [
- { "type": "null" },
- { "type": "string" }
- ]},
- "numRoceGdr": { "type": "integer", "minimum": 0 },
- "topologyFileConfigMap": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/rfc1123Label" }
- ]},
- "ncclGdrEnvConfigMap": { "oneOf": [
- { "type": "null" },
- { "$ref": "#/$defs/rfc1123Label" }
- ]},
- "multiNicNetworkName": { "oneOf": [
- { "type": "null" },
- { "type": "string" }
- ]},
- "disableSharedMemory": { "type": "boolean" },
- "mountNVMe": { "oneOf" : [
- { "type": "null" },
- { "type": "object",
- "properties": {
- "mountPath": { "type": "string" },
- "storage": { "type": "string" }
- },
- "additionalProperties": false
- }
- ]},
- "initContainers": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "autopilotHealthChecks": { "oneOf": [
- { "type": "null" },
- { "type": "array" }
- ]},
- "restartPolicy" : { "type": "string", "enum": ["Never", "Always", "OnFailure" ] },
- "hostIgnoreList": { "oneOf" : [
- { "type": "null" },
- { "type": "array" }
- ]},
- "schedulerName": { "oneOf": [
- { "type": "null" },
- { "type": "string", "enum": ["sakkara", "scheduler-plugins-scheduler", "default-scheduler" ] }
- ]},
- "serviceAccountName": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/rfc1123Label" }
- ]},
- "terminationGracePeriodSeconds": { "oneOf" : [
- { "type": "null" },
- { "type": "integer", "minimum": 0 }
- ]},
- "admissionGracePeriodDuration": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "warmupGracePeriodDuration": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "failureGracePeriodDuration": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "retryPausePeriodDuration": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "retryLimit": { "oneOf" : [
- { "type": "null" },
- { "type": "integer", "minimum": 0, "maximum": 100 }
- ]},
- "forcefulDeletionGracePeriodDuration": { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "deletionOnFailureGracePeriodDuration" : { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]},
- "successTTLDuration" : { "oneOf" : [
- { "type": "null" },
- { "$ref": "#/$defs/duration" }
- ]}
- },
-
- "if": {
- "properties": {
- "numPods": { "const": 1 }
- }
- },
- "then": {
- "properties": {
- "numRoceGdr": { "const": 0 }
- }
- },
-
- "$defs": {
- "rfc1123Label": {
- "type": "string",
- "pattern": "^[a-z]([-a-z0-9]*[a-z0-9])?$",
- "minLength": 1,
- "maxLength": 63
- },
- "resourceCPU": {
- "oneOf": [
- { "type": "integer", "mimimum": 1 },
- { "type": "string", "pattern": "^[0-9]+?(Ki|Mi|Gi|Ti|Pi|Ei|m|k|M|G|T|P|E)$" }
- ]
- },
- "resourceMemory": {
- "type": "string",
- "pattern": "^[0-9]+?(Ki|Mi|Gi|Ti|Pi|Ei|m|k|M|G|T|P|E)$"
- },
- "duration": {
- "type": "string",
- "pattern": "^[0-9]+?(s|m|h|d)$"
- }
- }
-}
diff --git a/tools/pytorchjob-generator/chart/values.yaml b/tools/pytorchjob-generator/chart/values.yaml
deleted file mode 100644
index 0b60656..0000000
--- a/tools/pytorchjob-generator/chart/values.yaml
+++ /dev/null
@@ -1,281 +0,0 @@
-####################
-# Job Metadata
-####################
-
-# -- (string) Name of the Job. Will be the name of the AppWrapper and the PyTorchJob.
-# @default -- must be provided by user
-# @section -- Job Metadata
-jobName:
-
-# -- (string) Namespace in which to run the Job. If unspecified, the namespace will be inferred using normal Helm/Kubernetes mechanisms when the Job is submitted.
-# @section -- Job Metadata
-namespace:
-
-# -- (string) Name of the local queue to which the Job will be submitted.
-# @section -- Job Metadata
-queueName: "default-queue"
-
-# -- (string) Type of priority for the job (choose from: "default-priority", "low-priority" or "high-priority").
-# @section -- Job Metadata
-priority: "default-priority"
-
-# -- (array) Optional array of custom labels to add to all the resources created by the Job (the PyTorchJob, the PodGroup, and the AppWrapper).
-# @section -- Job Metadata
-customLabels:
-# - key: project-name
-# value: my-project
-# - key: oranization-name
-# value: my-organization
-
-# -- (string) Image used for creating the Job's containers (needs to have all the applications your job may need)
-# @default -- must be provided by the user
-# @section -- Job Metadata
-containerImage:
-
-# -- (array) List of image-pull-secrets to be used for pulling containerImages
-# @section -- Job Metadata
-imagePullSecrets: #
-# - name: secret-one
-# - name: secret-two
-
-# -- (string) Policy for pulling containerImages (choose from: "IfNotPresent", "Always", or "Never")
-# @section -- Job Metadata
-imagePullPolicy: IfNotPresent
-
-##################################
-# Resource Requirements
-##################################
-
-# -- (integer) Total number of pods (i.e. master + worker pods) to be created
-# @section -- Resource Requirements
-numPods: 1
-
-# -- (integer or string) Number of CPUs for each pod. May be a positive integer or a ResourceQuantity (eg 500m)
-# @section -- Resource Requirements
-numCpusPerPod: 1
-
-# -- (integer) Number of GPUs for each pod (all GPUs per node is currently recommended for distributed training).
-# @section -- Resource Requirements
-numGpusPerPod: 0
-
-# -- (string) Total memory for each pod expressed as a ResourceQuantity (eg 1Gi, 200M, etc.).
-# @section -- Resource Requirements
-totalMemoryPerPod: 1Gi
-
-# -- (integer or string) Limit on the number of CPUs per pod for elastic jobs. May be a positive integer or a ResourceQuantity (eg 500m).
-# @default -- numCpusPerPod
-# @section -- Resource Requirements
-limitCpusPerPod:
-
-# -- (integer) Limit of number of GPUs per pod for elastic jobs.
-# @default -- numGpusPerPod
-# @section -- Resource Requirements
-limitGpusPerPod: # Limit of number of GPUs per pod for elastic jobs.
-
-# -- (string) Limit of total memory per pod for elastic jobs (eg 1Gi, 200M, etc.).
-# @default -- totalMemoryPerPod
-# @section -- Resource Requirements
-limitMemoryPerPod: # Limit of total memory per pod for elastic jobs
-
-########################
-# Workload Specification
-########################
-
-# -- (array) List of variables/values to be defined for all the ranks. Values can be literals or
-# references to Kuberetes secrets or configmaps. See [values.yaml](values.yaml) for examples of supported syntaxes.
-#
-# NOTE: The following standard [PyTorch Distributed environment variables](https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization)
-# are set automatically and can be referenced in the commands without being set manually: WORLD_SIZE, RANK, MASTER_ADDR, MASTER_PORT.
-# @section -- Workload Specification
-environmentVariables:
-# - name: EXAMPLE_VAR1
-# value: 6
-# - name: EXAMPLE_VAR2
-# value: "example2string"
-# - name: EXAMPLE_VAR3
-# secret:
-# name: secret-name
-# key: secret-key
-# - name: EXAMPLE_VAR4
-# configmap:
-# name: configmap-name
-# key: configmap-key
-
-# Private GitHub clone support.
-#
-# 0) Create a secret and configMap to enable Private GitHub cloning as documented for your organization.
-# 1) Then fill the name of the secret and configMap below in sshGitCloneConfig
-# 2) Finally, add your (ssh) git clone command to setupCommands in the next section
-#
-
-# -- (object) Private GitHub clone support. See [values.yaml](values.yaml) for additional instructions.
-# @section -- Workload Specification
-sshGitCloneConfig: # Field with "(secretName, configMapName)", optionally "(secretName, configMapName, secretMountPath, configMapMountPath, sshCmd)"
-# secretName: # see steps 1-3 of detailed instructions
-# configMapName: # see step 4 of detailed instructions.
-# secretMountPath: #
-# configMapMountPath: #
-# sshCmd: #
-
-# Commands
-#
-# Any command can be listed here
-#
-# -- (array) List of custom commands to be ran at the beginning of the execution. Use `setupCommand` to clone code, download data, and change directories.
-# @default -- no custom commands are executed
-# @section -- Workload Specification
-setupCommands: #
-# - git clone https://github.com/dbarnett/python-helloworld
-# - cd python-helloworld
-
-# Main PyTorch Program
-#
-# Single command to be fed to `torchrun`. Use setupCommands instead
-# if main program should be executed with any entry-point other than `torchrun`
-# e.g. `fairseq`, `colossialai`, `torch.distributed.launch` ...
-#
-# -- (string) Name of the PyTorch program to be executed by `torchrun`. Please provide your program name here and NOT in "setupCommands" as this helm template provides the necessary "torchrun" arguments for the parallel execution. WARNING: this program is relative to the current path set by change-of-directory commands in "setupCommands".
-# If no value is provided; then only `setupCommands` are executed and torchrun is elided.
-# @section -- Workload Specification
-mainProgram: #
-
-# -- (array) List of "(name, claimName, mountPath)" of volumes, with persistentVolumeClaim, to be mounted to the infrastructure
-# @default -- No volumes are mounted
-# @section -- Workload Specification
-volumes:
-# - name: arbitrary-name-0
-# claimName: name-matching-the-actual-PersistentVolumeClaim
-# mountPath: /path/to/where/you/want/to/find/your/data
-# - name: arbitrary-name-1
-# claimName: name-matching-another-actual-PersistentVolumeClaim
-# mountPath: /path/to/where/you/want/to/find/your/data
-
-# ------------------------------------------------------------------------------------------------
-# Advanced options begin here
-#
-
-# GDR support
-#
-# -- (string) RoCE GDR resource name (can vary by cluster configuration)
-# @default -- nvidia.com/roce_gdr
-# @section -- Advanced Options
-roceGdrResName: #
-
-# -- (integer) number of nvidia.com/roce_grd resources (0 means disabled; >0 means enable GDR over RoCE). Must be 0 unless numPods > 1.
-# @section -- Advanced Options
-numRoceGdr: 0
-
-# -- (string) Name of configmap containining /var/run/nvidia-topologyd/virtualTopology.xml for the system e.g. nvidia-topo-gdr
-# @section -- Advanced Options
-topologyFileConfigMap: # TODO make this required if numRoceGdr > 0 ?
-
-# -- (string) Name of configmap containing NCCL networking environment variables for the system e.g. nccl-netwk-env-vars
-# @section -- Advanced Options
-ncclGdrEnvConfigMap: # TODO make this required if numRoceGdr > 0 ?
-
-# -- (string) Name of multi-NIC network, if one is available.
-# Note: when GDR over RoCE is used/available, the RoCE multi-nic network instance
-# should be specified here instead of the TCP multi-nic network instance.
-# Existing instance names can be listed with `oc get multinicnetwork`.
-#
-# @section -- Advanced Options
-multiNicNetworkName:
-
-# -- (boolean) Control whether or not a shared memory volume is added to the PyTorchJob.
-# @section -- Advanced Options
-disableSharedMemory: false
-
-# -- (object) Mount NVMe as a volume.
-# The environment variable MOUNT_PATH_NVME provides the runtime mount path
-# @section -- Advanced Options
-mountNVMe:
- # storage: 800Gi
- # mountPath: "/workspace/scratch-nvme"
-
-# -- (array) List of "(name, image, command[])" specifying an init containers to be run before the main job. The 'command' field is a list of commands to run in the container, see the Kubernetes entry on initContainers for reference.
-#
-# @section -- Advanced Options
-initContainers:
-# - name: init-container-1
-# image: busybox
-# command: ["sh", "-c", "whoami && ls -l"]
-# - name: init-container-2
-# image: ubuntu
-# command: ["sh", "-c", "echo hello world!"]
-
-# -- (array) Autopilot health checks.
-# List of labels enabling one or more system health pre-flight checks.
-# @default -- No pre-flight checks are enabled.
-# @section -- Advanced Options
-autopilotHealthChecks:
-# - gpu-pcie-bw
-
-# -- (array) List of host names on which the Job must not be scheduled (to avoid faulty nodes).
-# @section -- Advanced Options
-hostIgnoreList:
-# - a100-large-drlfv-worker-3-with-secondary-nw5qh
-# - a100-large-drlfv-worker-3-with-secondary-lb7ch
-
-# -- (string) If non-nil, use the specified Kubernetes scheduler.
-# ***Setting this to the default-scheduler may result in GPU fragmentation on the cluster. Setting this
-# to any non-nil value should only be done when explicitly directed to do so by a cluster admin!***
-# @section -- Advanced Options
-schedulerName:
-
-# -- (string) Service account to be used for running the Job
-# @section -- Advanced Options
-# @default -- the default service account for the namespace will be used.
-serviceAccountName: # service account name
-
-############################
-# Fault Tolerance
-############################
-
-# -- (string) Customize the admissionGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-admissionGracePeriodDuration:
-
-# -- (string) Customize the warmupGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-warmupGracePeriodDuration:
-
-# -- (string) Customize the failureGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-failureGracePeriodDuration:
-
-# -- (string) Customize the retryPausePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-retryPausePeriodDuration:
-
-# -- (integer) Customize the retryLimit; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-retryLimit:
-
-# -- (string) Customize the forcefulDeletionGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-forcefulDeletionGracePeriodDuration:
-
-# -- (string) Customize the deletionOnFailureGracePeriod; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-deletionOnFailureGracePeriodDuration:
-
-# -- (string) Customize the successTTL; see https://project-codeflare.github.io/appwrapper/arch-fault-tolerance/
-# @section -- Fault Tolerance
-# @default -- The AppWrapper defaults will be used
-successTTLDuration:
-
-# -- (string) Set Kubernetes policy for restarting failed containers "in place" (without restarting the Pod).
-# @section -- Fault Tolerance
-restartPolicy: "Never"
-
-# -- (integer) Set a non-default pod termination grace period (in seconds).
-# @default -- Kubernetes's default value is used
-# @section -- Fault Tolerance
-terminationGracePeriodSeconds:
diff --git a/tools/pytorchjob-generator/examples/helloworld-sakkara.settings.yaml b/tools/pytorchjob-generator/examples/helloworld-sakkara.settings.yaml
deleted file mode 100644
index 67c83cc..0000000
--- a/tools/pytorchjob-generator/examples/helloworld-sakkara.settings.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-jobName: my-job # name of the generated AppWrapper and PyTorchJob objects (required)
-queueName: default-queue # local queue to submit to (default: default-queue)
-
-schedulerName: sakkara
-# If additional constraints are used, specify the configmap here:
-#customLabels:
-# - key: sakkara.group.name
-# value: my-topogrp-0
-
-numPods: 4 # total pod count including master and worker pods (default: 1)
-numCpusPerPod: 500m # requested number of cpus per pod (default: 1)
-numGpusPerPod: 8 # requested number of gpus per pod (default: 0)
-totalMemoryPerPod: 1Gi # requested amount of memory per pod (default: 1Gi)
-
-priority: default-priority # default-priority (default), low-priority, or high-priority
-
-# container image for the pods (required)
-containerImage: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
-
-# setup commands to run in each pod (optional)
-setupCommands:
-- git clone https://github.com/dbarnett/python-helloworld
-- cd python-helloworld
-
-# main program to invoke via torchrun (optional)
-mainProgram: helloworld.py
diff --git a/tools/pytorchjob-generator/examples/helloworld.settings.yaml b/tools/pytorchjob-generator/examples/helloworld.settings.yaml
deleted file mode 100644
index a027d91..0000000
--- a/tools/pytorchjob-generator/examples/helloworld.settings.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-jobName: my-job # name of the generated AppWrapper and PyTorchJob objects (required)
-queueName: default-queue # local queue to submit to (default: default-queue)
-
-numPods: 4 # total pod count including master and worker pods (default: 1)
-numCpusPerPod: 500m # requested number of cpus per pod (default: 1)
-numGpusPerPod: 8 # requested number of gpus per pod (default: 0)
-totalMemoryPerPod: 1Gi # requested amount of memory per pod (default: 1Gi)
-
-priority: default-priority # default-priority (default), low-priority, or high-priority
-
-# container image for the pods (required)
-containerImage: ghcr.io/foundation-model-stack/base:pytorch-latest-nightly-20230126
-
-# setup commands to run in each pod (optional)
-setupCommands:
-- git clone https://github.com/dbarnett/python-helloworld
-- cd python-helloworld
-
-# main program to invoke via torchrun (optional)
-mainProgram: helloworld.py
diff --git a/tools/pytorchjob-generator/release-instructions.md b/tools/pytorchjob-generator/release-instructions.md
deleted file mode 100644
index fb2a0ea..0000000
--- a/tools/pytorchjob-generator/release-instructions.md
+++ /dev/null
@@ -1,20 +0,0 @@
-## Release Instructions
-
-1. Create a release prep branch
-
-2. Update the version number in chart/Chart.yaml
-
-3. Do a `helm unittest -u chart` and then run precommit to
- regenerate the helmdocs. Inspect the diff and make sure
- the only changes are the Chart version
-
-4. Update the chart version number in the example
- of `helm repo search` in the main README.md
-
-5. Submit & merge a PR with these changes
-
-6. Manually trigger the `Release Charts` workflow in the Actions
- tab of the MLBatch GitHub project. This action will automatically
- generate and push tags for the newly released chart and trigger an
- update of the GH Pages (which contains the helm repo).
-
diff --git a/tools/sakkara-deploy/README.md b/tools/sakkara-deploy/README.md
deleted file mode 100644
index ea531f0..0000000
--- a/tools/sakkara-deploy/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-The helm/chart-installer-action does not understand git submodules.
-
-Therfore we maintain a copy of https://github.com/atantawi/sakkara-deploy/tree/main/install/ here.
\ No newline at end of file
diff --git a/tools/sakkara-deploy/release-instructions.md b/tools/sakkara-deploy/release-instructions.md
deleted file mode 100644
index fb2a0ea..0000000
--- a/tools/sakkara-deploy/release-instructions.md
+++ /dev/null
@@ -1,20 +0,0 @@
-## Release Instructions
-
-1. Create a release prep branch
-
-2. Update the version number in chart/Chart.yaml
-
-3. Do a `helm unittest -u chart` and then run precommit to
- regenerate the helmdocs. Inspect the diff and make sure
- the only changes are the Chart version
-
-4. Update the chart version number in the example
- of `helm repo search` in the main README.md
-
-5. Submit & merge a PR with these changes
-
-6. Manually trigger the `Release Charts` workflow in the Actions
- tab of the MLBatch GitHub project. This action will automatically
- generate and push tags for the newly released chart and trigger an
- update of the GH Pages (which contains the helm repo).
-
diff --git a/tools/sakkara-deploy/sakkara-scheduler/.helmignore b/tools/sakkara-deploy/sakkara-scheduler/.helmignore
deleted file mode 100644
index 0e8a0eb..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/.helmignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
diff --git a/tools/sakkara-deploy/sakkara-scheduler/Chart.yaml b/tools/sakkara-deploy/sakkara-scheduler/Chart.yaml
deleted file mode 100644
index 347ee24..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/Chart.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-apiVersion: v2
-appVersion: v0.29.7
-description: Deploy sakkara group and topology aware scheduler plugin in a cluster
-name: sakkara-scheduler
-type: application
-version: 0.0.1
diff --git a/tools/sakkara-deploy/sakkara-scheduler/README.md b/tools/sakkara-deploy/sakkara-scheduler/README.md
deleted file mode 100644
index caec5b5..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# sakkara-scheduler
-
-  
-
-Deploy sakkara group and topology aware scheduler plugin in a cluster
-
-## Values
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| fullnameOverride | string | `""` | |
-| image.repository | string | `"quay.io"` | repository to fetch images from |
-| image.tag | string | `"v0.0.1"` | default is the chart appVersion |
-| nameOverride | string | `"sakkara"` | |
-| nodeSelector | object | `{}` | |
-| pluginConfig[0].args.topologyConfigMapNameSpace | string | `"sakkara-scheduler"` | |
-| pluginConfig[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.permit.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.postBind.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.postFilter.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.preEnqueue.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.preScore.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.queueSort.disabled[0].name | string | `"*"` | |
-| plugins.queueSort.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.reserve.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.score.disabled[0].name | string | `"*"` | |
-| plugins.score.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
-| plugins.score.enabled[0].weight | int | `10` | |
-| podAnnotations | object | `{}` | |
-| priorityClassName | string | `"system-node-critical"` | |
-| scheduler.affinity | object | `{}` | affinity for deployment's pods |
-| scheduler.enabled | bool | `true` | deploy second scheduler as deployment |
-| scheduler.image | string | `"ibm/sakkara-scheduler"` | path to scheduler image from repository |
-| scheduler.imagePullPolicy | string | `"IfNotPresent"` | |
-| scheduler.leaderElect | bool | `false` | enable for HA mode |
-| scheduler.replicaCount | int | `1` | increase for HA mode |
-| scheduler.resources | object | `{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"200m","memory":"512Mi"}}` | requests/limits for scheduler deployment resources: {} |
-| scheduler.strategy.type | string | `"RollingUpdate"` | Deployment update strategy type |
-| scheduler.verbosity | int | `6` | Log level from 1 to 9 |
-| schedulerConfig.apiVersion | string | `"kubescheduler.config.k8s.io/v1"` | scheduler config apiversion (ref: https://kubernetes.io/docs/reference/scheduling/config/) |
-| securityContext.privileged | bool | `false` | |
-| tolerations | list | `[]` | |
-| useForKubeSchedulerUser | bool | `false` | allow User system:kube-scheduler to work with metrics and CRDs. primary usage is to replace default-scheduler with custom one |
-
-----------------------------------------------
-Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
diff --git a/tools/sakkara-deploy/sakkara-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml b/tools/sakkara-deploy/sakkara-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml
deleted file mode 100644
index a0790dc..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- annotations:
- api-approved.kubernetes.io: https://github.com/kubernetes-sigs/scheduler-plugins/pull/50
- controller-gen.kubebuilder.io/version: v0.11.1
- creationTimestamp: null
- name: podgroups.scheduling.x-k8s.io
-spec:
- group: scheduling.x-k8s.io
- names:
- kind: PodGroup
- listKind: PodGroupList
- plural: podgroups
- shortNames:
- - pg
- - pgs
- singular: podgroup
- scope: Namespaced
- versions:
- - name: v1alpha1
- schema:
- openAPIV3Schema:
- description: PodGroup is a collection of Pod; used for batch workload.
- properties:
- apiVersion:
- description: 'APIVersion defines the versioned schema of this representation
- of an object. Servers should convert recognized schemas to the latest
- internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
- type: string
- kind:
- description: 'Kind is a string value representing the REST resource this
- object represents. Servers may infer this from the endpoint the client
- submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
- type: string
- metadata:
- type: object
- spec:
- description: Specification of the desired behavior of the pod group.
- properties:
- minMember:
- description: MinMember defines the minimal number of members/tasks
- to run the pod group; if there's not enough resources to start all
- tasks, the scheduler will not start anyone.
- format: int32
- type: integer
- minResources:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: MinResources defines the minimal resource of members/tasks
- to run the pod group; if there's not enough resources to start all
- tasks, the scheduler will not start anyone.
- type: object
- scheduleTimeoutSeconds:
- description: ScheduleTimeoutSeconds defines the maximal time of members/tasks
- to wait before run the pod group;
- format: int32
- type: integer
- type: object
- status:
- description: Status represents the current information about a pod group.
- This data may not be up to date.
- properties:
- failed:
- description: The number of pods which reached phase Failed.
- format: int32
- type: integer
- occupiedBy:
- description: OccupiedBy marks the workload (e.g., deployment, statefulset)
- UID that occupy the podgroup. It is empty if not initialized.
- type: string
- phase:
- description: Current phase of PodGroup.
- type: string
- running:
- description: The number of actively running pods.
- format: int32
- type: integer
- scheduleStartTime:
- description: ScheduleStartTime of the group
- format: date-time
- type: string
- succeeded:
- description: The number of pods which reached phase Succeeded.
- format: int32
- type: integer
- type: object
- type: object
- served: true
- storage: true
- subresources:
- status: {}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/templates/_helpers.tpl b/tools/sakkara-deploy/sakkara-scheduler/templates/_helpers.tpl
deleted file mode 100644
index 8edc98d..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/templates/_helpers.tpl
+++ /dev/null
@@ -1,51 +0,0 @@
-{{/*
-Expand the name of the chart.
-*/}}
-{{- define "scheduler-plugins.name" -}}
-{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Create a default fully qualified app name.
-We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
-If release name contains chart name it will be used as a full name.
-*/}}
-{{- define "scheduler-plugins.fullname" -}}
-{{- if .Values.fullnameOverride }}
-{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- $name := default .Chart.Name .Values.nameOverride }}
-{{- if contains $name .Release.Name }}
-{{- .Release.Name | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
-{{- end }}
-{{- end }}
-{{- end }}
-
-{{/*
-Create chart name and version as used by the chart label.
-*/}}
-{{- define "scheduler-plugins.chart" -}}
-{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Common labels
-*/}}
-{{- define "scheduler-plugins.labels" -}}
-helm.sh/chart: {{ include "scheduler-plugins.chart" . }}
-{{ include "scheduler-plugins.selectorLabels" . }}
-{{- if .Chart.AppVersion }}
-app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
-{{- end }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}
-
-{{/*
-Selector labels
-*/}}
-{{- define "scheduler-plugins.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "scheduler-plugins.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/templates/configmap.yaml b/tools/sakkara-deploy/sakkara-scheduler/templates/configmap.yaml
deleted file mode 100644
index 5adb1a8..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/templates/configmap.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: {{ include "scheduler-plugins.fullname" . }}
- namespace: {{ .Release.Namespace }}
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
-data:
- scheduler-config.yaml: |
- apiVersion: {{ .Values.schedulerConfig.apiVersion }}
- kind: KubeSchedulerConfiguration
- leaderElection:
- leaderElect: {{ .Values.scheduler.leaderElect }}
- resourceName: {{ include "scheduler-plugins.fullname" . }}
- profiles:
- # Compose all plugins in one profile
- - schedulerName: {{ include "scheduler-plugins.fullname" . }}
- plugins:
- {{- toYaml $.Values.plugins | nindent 8 }}
- {{- if $.Values.pluginConfig }}
- pluginConfig: {{ toYaml $.Values.pluginConfig | nindent 6 }}
-{{- end }}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/templates/deployment.yaml b/tools/sakkara-deploy/sakkara-scheduler/templates/deployment.yaml
deleted file mode 100644
index 9951018..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/templates/deployment.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-{{- if .Values.scheduler.enabled }}
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ include "scheduler-plugins.fullname" . }}
- namespace: {{ .Release.Namespace }}
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
- component: scheduler
-spec:
- replicas: {{ .Values.scheduler.replicaCount }}
- {{- with .Values.scheduler.strategy }}
- strategy:
- {{- toYaml . | nindent 4 }}
- {{- end }}
- selector:
- matchLabels:
- {{- include "scheduler-plugins.selectorLabels" . | nindent 6 }}
- component: scheduler
- template:
- metadata:
- annotations:
- checksum/configmap: '{{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}'
- {{- with .Values.podAnnotations }}
- {{- toYaml . | nindent 8 }}
- {{- end }}
- labels:
- {{- include "scheduler-plugins.selectorLabels" . | nindent 8 }}
- component: scheduler
- spec:
- priorityClassName: {{ .Values.priorityClassName }}
- serviceAccountName: {{ include "scheduler-plugins.fullname" . }}
- containers:
- - command:
- - /bin/kube-scheduler
- - --config=/etc/kubernetes/scheduler-config.yaml
- - --v={{ .Values.scheduler.verbosity }}
- name: scheduler
- image: "{{ .Values.image.repository }}/{{ .Values.scheduler.image }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
- imagePullPolicy: {{ .Values.scheduler.imagePullPolicy }}
- resources:
- {{- toYaml .Values.scheduler.resources | nindent 12 }}
- securityContext:
- {{- toYaml .Values.securityContext | nindent 12 }}
- volumeMounts:
- - name: scheduler-config
- mountPath: /etc/kubernetes
- readOnly: true
- {{- with .Values.nodeSelector }}
- nodeSelector:
- {{- toYaml . | nindent 8 }}
- {{- end }}
- {{- with .Values.scheduler.affinity }}
- affinity:
- {{- toYaml . | nindent 8 }}
- {{- end }}
- {{- with .Values.tolerations }}
- tolerations:
- {{- toYaml . | nindent 8 }}
- {{- end }}
- volumes:
- - name: scheduler-config
- configMap:
- name: {{ include "scheduler-plugins.fullname" . }}
-{{- end }}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/templates/rbac.yaml b/tools/sakkara-deploy/sakkara-scheduler/templates/rbac.yaml
deleted file mode 100644
index 17b88c6..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/templates/rbac.yaml
+++ /dev/null
@@ -1,126 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: {{ include "scheduler-plugins.fullname" . }}
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
-rules:
- - apiGroups: ["", "events.k8s.io"]
- resources: ["events"]
- verbs: ["create", "patch", "update"]
- - apiGroups: ["coordination.k8s.io"]
- resources: ["leases"]
- verbs: ["create"]
- - apiGroups: ["coordination.k8s.io"]
- resourceNames: ["kube-scheduler"]
- resources: ["leases"]
- verbs: ["get", "update"]
- - apiGroups: [""]
- resources: ["endpoints"]
- verbs: ["create"]
- - apiGroups: [""]
- resourceNames: ["kube-scheduler"]
- resources: ["endpoints"]
- verbs: ["get", "update"]
- - apiGroups: [""]
- resources: ["nodes"]
- verbs: ["get", "list", "watch"]
- - apiGroups: [""]
- resources: ["pods"]
- verbs: ["delete", "get", "list", "watch", "patch", "update"]
- - apiGroups: [""]
- resources: ["bindings", "pods/binding"]
- verbs: ["create"]
- - apiGroups: [""]
- resources: ["pods/status"]
- verbs: ["patch", "update"]
- - apiGroups: [""]
- resources: ["replicationcontrollers", "services"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["apps", "extensions"]
- resources: ["replicasets"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["apps"]
- resources: ["statefulsets"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["policy"]
- resources: ["poddisruptionbudgets"]
- verbs: ["get", "list", "watch"]
- - apiGroups: [""]
- resources: ["persistentvolumeclaims", "persistentvolumes"]
- verbs: ["get", "list", "watch", "patch", "update"]
- - apiGroups: ["authentication.k8s.io"]
- resources: ["tokenreviews"]
- verbs: ["create"]
- - apiGroups: ["authorization.k8s.io"]
- resources: ["subjectaccessreviews"]
- verbs: ["create"]
- - apiGroups: ["storage.k8s.io"]
- resources: ["csinodes", "storageclasses"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["scheduling.x-k8s.io"]
- resources: ["podgroups", "elasticquotas"]
- verbs: ["get", "list", "watch", "create", "delete", "update", "patch"]
- - apiGroups: [""]
- resources: ["events"]
- verbs: ["create", "patch", "update"]
- - apiGroups: [""]
- resources: ["configmaps", "storageclasses"]
- verbs: ["get", "list", "watch", "patch", "update"]
- - apiGroups: [""]
- resources: ["endpoints"]
- verbs: ["create", "get", "list", "watch", "update"]
- - apiGroups: [""]
- resourceNames: ["kube-scheduler"]
- resources: ["endpoints"]
- verbs: ["get", "delete", "update", "patch"]
- - apiGroups: [""]
- resources: ["bindings","pods/binding"]
- verbs: ["create"]
- - apiGroups: [""]
- resources: ["poddisruptionbudgets"]
- verbs: ["get", "list", "watch"]
- - apiGroups: [""]
- resources: ["namespaces"]
- verbs: ["get", "list", "watch"]
- - apiGroups: [""]
- resources: ["csistoragecapacities"]
- verbs: ["get", "list"]
- - apiGroups: ["storage.k8s.io"]
- resources: ["csidrivers", "csistoragecapacities"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["metrics.k8s.io"]
- resources: ["nodes"]
- verbs: ["get", "list", "watch"]
----
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: {{ include "scheduler-plugins.fullname" . }}
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: {{ include "scheduler-plugins.fullname" . }}
-subjects:
-- kind: ServiceAccount
- name: {{ include "scheduler-plugins.fullname" . }}
- namespace: {{ .Release.Namespace }}
-{{- if .Values.useForKubeSchedulerUser }}
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
- name: scheduler-plugins-kube-scheduler
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: {{ include "scheduler-plugins.fullname" . }}
-subjects:
-- apiGroup: rbac.authorization.k8s.io
- kind: User
- name: system:kube-scheduler
-{{- end }}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/templates/serviceaccount.yaml b/tools/sakkara-deploy/sakkara-scheduler/templates/serviceaccount.yaml
deleted file mode 100644
index 8963040..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/templates/serviceaccount.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: {{ include "scheduler-plugins.fullname" . }}
- namespace: {{ .Release.Namespace }}
- labels:
- {{- include "scheduler-plugins.labels" . | nindent 4 }}
diff --git a/tools/sakkara-deploy/sakkara-scheduler/values.yaml b/tools/sakkara-deploy/sakkara-scheduler/values.yaml
deleted file mode 100644
index f751fe8..0000000
--- a/tools/sakkara-deploy/sakkara-scheduler/values.yaml
+++ /dev/null
@@ -1,92 +0,0 @@
-nameOverride: "sakkara"
-fullnameOverride: ""
-
-image:
- # -- repository to fetch images from
- repository: quay.io
- # -- default is the chart appVersion
- tag: "v0.0.1"
-
-# -- allow User system:kube-scheduler to work with metrics and CRDs.
-# primary usage is to replace default-scheduler with custom one
-useForKubeSchedulerUser: false
-
-scheduler:
- # -- deploy second scheduler as deployment
- enabled: true
- # -- path to scheduler image from repository
- image: ibm/sakkara-scheduler
- imagePullPolicy: IfNotPresent
- # -- increase for HA mode
- replicaCount: 1
- # -- enable for HA mode
- leaderElect: false
- # -- Log level from 1 to 9
- verbosity: 6
- strategy:
- # -- Deployment update strategy type
- type: RollingUpdate
- # -- requests/limits for scheduler deployment
- # resources: {}
- resources:
- requests:
- cpu: "200m"
- memory: "512Mi"
- limits:
- cpu: "500m"
- memory: "512Mi"
- # -- affinity for deployment's pods
- affinity: {}
-
-priorityClassName: system-node-critical
-
-tolerations: []
-
-nodeSelector: {}
-
-podAnnotations: {}
-
-securityContext:
- privileged: false
-
-# scheduler framework plugins
-plugins:
- preEnqueue:
- enabled:
- - name: ClusterTopologyPlacementGroup
- queueSort:
- enabled:
- - name: ClusterTopologyPlacementGroup
- disabled:
- - name: "*"
- postFilter:
- enabled:
- - name: ClusterTopologyPlacementGroup
- preScore:
- enabled:
- - name: ClusterTopologyPlacementGroup
- score:
- enabled:
- - name: ClusterTopologyPlacementGroup
- weight: 10
- disabled:
- - name: "*"
- reserve:
- enabled:
- - name: ClusterTopologyPlacementGroup
- permit:
- enabled:
- - name: ClusterTopologyPlacementGroup
- postBind:
- enabled:
- - name: ClusterTopologyPlacementGroup
-
-# plugin specific args
-pluginConfig:
-- name: ClusterTopologyPlacementGroup
- args:
- topologyConfigMapNameSpace: sakkara-scheduler
-
-schedulerConfig:
- # -- scheduler config apiversion (ref: https://kubernetes.io/docs/reference/scheduling/config/)
- apiVersion: kubescheduler.config.k8s.io/v1