Skip to content

Commit

Permalink
add feature Hunyuan_Video_Model
Browse files Browse the repository at this point in the history
  • Loading branch information
VikramxD committed Jan 17, 2025
1 parent 085907f commit 1f1201f
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 21 deletions.
1 change: 1 addition & 0 deletions configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

74 changes: 74 additions & 0 deletions configs/hunyuan_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Configuration for Hunyuan Video Generation.
This module contains the configuration settings for the Hunyuan video generation model.
Settings can be configured via environment variables with the VIDEO_GEN_ prefix.
Example:
To use with default settings:
>>> from configs.hunyuan_config import HunyuanConfig
>>> config = HunyuanConfig()
To override via environment variables:
$ export VIDEO_GEN_MODEL_ID="custom/model"
$ export VIDEO_GEN_DEVICE_MAP="cuda"
To override in code:
>>> config = HunyuanConfig(
... model_id="custom/model",
... device_map="cuda"
... )
"""

from pydantic_settings import BaseSettings


class HunyuanConfig(BaseSettings):
"""Configuration settings for Hunyuan Video Generation.
This class uses Pydantic's BaseSettings to manage configuration with environment
variable support. All settings can be overridden via environment variables
with the prefix VIDEO_GEN_.
Attributes:
model_id (str): The Huggingface model ID for the Hunyuan video model.
Default: "hunyuanvideo-community/HunyuanVideo"
device_map (str): Strategy for mapping model layers to devices.
Options: "auto", "balanced", "sequential", or specific device like "cuda:0".
Default: "balanced"
load_in_8bit (bool): Whether to load the model in 8-bit quantization.
Reduces memory usage at the cost of slight quality degradation.
Default: True
torch_dtype (str): PyTorch data type for model weights.
Options: "float16", "float32", "bfloat16".
Default: "float16"
output_dir (str): Directory where generated videos will be saved.
Default: "outputs"
default_fps (int): Default frames per second for generated videos.
Default: 15
Example:
>>> config = HunyuanConfig(
... model_id="custom/model",
... device_map="cuda",
... output_dir="custom_outputs"
... )
>>> print(config.model_id)
'custom/model'
"""

model_id: str = "hunyuanvideo-community/HunyuanVideo"
device_map: str = "balanced"
load_in_8bit: bool = True
torch_dtype: str = "float16"
output_dir: str = "outputs"
default_fps: int = 15

class Config:
"""Pydantic configuration class.
Attributes:
env_prefix: Prefix for environment variables.
Example: VIDEO_GEN_MODEL_ID will set the model_id attribute.
"""
env_prefix = "VIDEO_GEN_"
207 changes: 186 additions & 21 deletions scripts/hunyuan_video_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,203 @@
This script provides functionality for generating videos using the Hunyuan AI model.
It handles model initialization, video generation configuration, and output saving.
The script uses Pydantic settings for configuration management, which can be
configured via environment variables or programmatically.
The script can be configured via environment variables (with VIDEO_GEN_ prefix) or a .env file.
Example:
Basic usage with default settings:
>>> from scripts.hunyuan_video_inference import HunyuanVideoInference
>>> generator = HunyuanVideoInference()
>>> video_path = generator.generate_video(
... prompt="A cat walks on the grass, realistic style."
... )
>>> print(f"Video saved to: {video_path}")
Usage with custom configuration:
>>> from configs.hunyuan_config import HunyuanConfig
>>> config = HunyuanConfig(device_map="cuda", output_dir="custom_outputs")
>>> generator = HunyuanVideoInference(config)
>>> video_path = generator.generate_video(
... prompt="A dog playing in the snow",
... num_frames=30,
... fps=30
... )
Note:
This script requires the Hunyuan model to be available either locally or
downloadable from the Huggingface Hub. The model requires significant
GPU memory, especially when not using 8-bit quantization.
"""

import os
import time
from pathlib import Path
from loguru import logger
from typing import Optional
from datetime import datetime

import torch
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, HunyuanVideoTransformer3DModel, HunyuanVideoPipeline
from loguru import logger
from diffusers import (
BitsAndBytesConfig as DiffusersBitsAndBytesConfig,
HunyuanVideoTransformer3DModel,
HunyuanVideoPipeline,
)
from diffusers.utils import export_to_video

from configs.hunyuan_config import HunyuanConfig

model_id = "hunyuanvideo-community/HunyuanVideo"

class HunyuanVideoInference:
"""Hunyuan Video Generation inference class.
This class provides an interface for generating videos using the Hunyuan model.
It handles model initialization, pipeline setup, and video generation with
configurable parameters.
The class uses the HunyuanConfig for configuration management, which can be
provided during initialization or created with default values.
Attributes:
config (HunyuanConfig): Configuration instance for the inference pipeline.
pipeline (HunyuanVideoPipeline): The loaded Hunyuan video generation pipeline.
Example:
>>> generator = HunyuanVideoInference()
>>> video_path = generator.generate_video(
... prompt="A cat walks on the grass",
... num_frames=30,
... fps=30
... )
>>> print(f"Generated video: {video_path}")
"""

def __init__(self, config: Optional[HunyuanConfig] = None):
"""Initialize the Hunyuan Video inference pipeline.
Args:
config (Optional[HunyuanConfig]): Configuration for the inference pipeline.
If None, default configuration will be used.
"""
self.config = config or HunyuanConfig()
self.setup_pipeline()

def setup_pipeline(self):
"""Set up the Hunyuan Video pipeline with the specified configuration.
This method initializes the model and creates the pipeline with the
configuration specified during class initialization. It handles:
1. Setting up quantization configuration
2. Loading the transformer model
3. Creating the pipeline with the loaded model
The method sets the pipeline attribute of the class, which is then
used for video generation.
Note:
This method is automatically called during initialization and typically
doesn't need to be called directly.
"""
quant_config = DiffusersBitsAndBytesConfig(
load_in_8bit=self.config.load_in_8bit
)

torch_dtype = getattr(torch, self.config.torch_dtype)

transformer_8bit = HunyuanVideoTransformer3DModel.from_pretrained(
self.config.model_id,
subfolder="transformer",
quantization_config=quant_config,
torch_dtype=torch_dtype,
)

self.pipeline = HunyuanVideoPipeline.from_pretrained(
self.config.model_id,
transformer=transformer_8bit,
torch_dtype=torch_dtype,
device_map=self.config.device_map,
)

def generate_video(
self,
prompt: str,
output_path: Optional[str] = None,
num_frames: int = 61,
num_inference_steps: int = 30,
fps: Optional[int] = None,
) -> str:
"""Generate a video based on the given prompt.
This method handles the entire video generation process, including:
1. Creating the output directory if needed
2. Generating the video frames using the Hunyuan pipeline
3. Exporting the frames to a video file
Args:
prompt (str): Text prompt describing the video to generate.
output_path (Optional[str]): Path to save the video. If None,
a timestamped filename in the configured output directory
will be used.
num_frames (int): Number of frames to generate. More frames
result in longer videos but increase generation time.
Default: 61
num_inference_steps (int): Number of denoising steps. Higher
values may improve quality but increase generation time.
Default: 30
fps (Optional[int]): Frames per second for the output video.
If None, uses the configured default_fps.
Default: None
Returns:
str: Path to the generated video file.
Example:
>>> generator = HunyuanVideoInference()
>>> path = generator.generate_video(
... prompt="A cat walks on the grass",
... num_frames=30,
... fps=30
... )
>>> print(f"Video saved to: {path}")
"""
# Create output directory if it doesn't exist
if output_path is None:
os.makedirs(self.config.output_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(
self.config.output_dir,
f"video_{timestamp}.mp4"
)

# Generate video
video = self.pipeline(
prompt=prompt,
num_frames=num_frames,
num_inference_steps=num_inference_steps
).frames[0]

# Export video
export_to_video(
video,
output_path,
fps=fps or self.config.default_fps
)

logger.info(f"Video generated and saved to: {output_path}")
return output_path

quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
transformer_8bit = HunyuanVideoTransformer3DModel.from_pretrained(
"hunyuanvideo-community/HunyuanVideo",
subfolder="transformer",
quantization_config=quant_config,
torch_dtype=torch.float16,
)

pipeline = HunyuanVideoPipeline.from_pretrained(
"hunyuanvideo-community/HunyuanVideo",
transformer=transformer_8bit,
torch_dtype=torch.float16,
device_map="balanced",
)
def main():
"""Example usage of the HunyuanVideoInference class.
This function demonstrates how to use the HunyuanVideoInference class
with default settings to generate a simple video.
"""
config = HunyuanConfig()
generator = HunyuanVideoInference(config)

prompt = "A cat walks on the grass, realistic style."
video_path = generator.generate_video(prompt)
print(f"Generated video saved at: {video_path}")


prompt = "A cat walks on the grass, realistic style."
video = pipeline(prompt=prompt, num_frames=61, num_inference_steps=30).frames[0]
export_to_video(video, "cat.mp4", fps=15)
if __name__ == "__main__":
main()

0 comments on commit 1f1201f

Please sign in to comment.