gitatt

Ganjiali · Oct 31, 2024 · 226b7f0 · 226b7f0
1 parent b69783d
commit 226b7f0
Show file tree

Hide file tree

Showing 8 changed files with 22 additions and 5 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+video.mp4 filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
@@ -4,7 +4,24 @@
 
 ![](./media/thumb.png)
 
-Oasis 500M is the open-source research preview of Oasis, an interactive video diffusion transformer developed by Decart and Etched.
+Oasis is an interactive world model developed by [Decart](https://www.decart.ai/) and [Etched](https://www.etched.com/). Based on diffusion transformers, Oasis takes in user keyboard input and generates gameplay in an autoregressive manner. Here we release the weights for Oasis 500M, a downscaled version of the model, along with inference code for action-conditional frame generation.
 
-See our [blog]() to learn more.
+For more details, see our [joint blog post](https://oasis-model.github.io/) to learn more.
+
+## Setup
+
+```
+git lfs install
+git clone https://huggingface.co/Etched/oasis-500m
+cd oasis-500m
+pip install -r requirements.txt
+```
+
+## Basic Usage
+We include a basic inference script that loads a prompt frame from a video and generates additional frames conditioned on actions.
+```
+cd oasis-500m
+python inference.py
+```
+The resulting video will be saved to `video.mp4`.
 
diff --git a/inference.py b/inference.py
@@ -26,16 +26,15 @@
 
 # sampling params
 B = 1
-total_frames = 10
+total_frames = 32
 max_noise_level = 1000
 ddim_noise_steps = 100
-stabilization_level = 15
 noise_range = torch.linspace(-1, max_noise_level - 1, ddim_noise_steps + 1)
 noise_abs_max = 20
 ctx_max_noise_idx = ddim_noise_steps // 10 * 3
 
 # get input video 
-video_id = "treechop-f153ac423f61-20210916-183423.chunk_000"
+video_id = "snippy-chartreuse-mastiff-f79998db196d-20220401-224517.chunk_001"
 mp4_path = f"sample_data/{video_id}.mp4"
 actions_path = f"sample_data/{video_id}.actions.pt"
 video = read_video(mp4_path, pts_unit="sec")[0].float() / 255

diff --git a/media/arch.png b/media/arch.png
diff --git a/sample_data/snippy-chartreuse-mastiff-f79998db196d-20220401-224517.chunk_001.actions.pt b/sample_data/snippy-chartreuse-mastiff-f79998db196d-20220401-224517.chunk_001.actions.pt
diff --git a/sample_data/snippy-chartreuse-mastiff-f79998db196d-20220401-224517.chunk_001.mp4 b/sample_data/snippy-chartreuse-mastiff-f79998db196d-20220401-224517.chunk_001.mp4
diff --git a/sample_data/treechop-f153ac423f61-20210916-183423.chunk_000.actions.pt b/sample_data/treechop-f153ac423f61-20210916-183423.chunk_000.actions.pt
diff --git a/sample_data/treechop-f153ac423f61-20210916-183423.chunk_000.mp4 b/sample_data/treechop-f153ac423f61-20210916-183423.chunk_000.mp4