Skip to content

Commit

Permalink
Sinc to latest code update on sd-script
Browse files Browse the repository at this point in the history
  • Loading branch information
bmaltais committed Feb 22, 2023
1 parent 34ab844 commit 2a5fb34
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 202 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,18 @@ This will store your a backup file with your current locally installed pip packa

## Change History

* 2023/02/22 (v20.8.0):
- Refactor optmizer options. Thanks to mgz-dev!
- Add `--optimizer_type` option for each training script. Please see help. Japanese documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E3%82%AA%E3%83%97%E3%83%86%E3%82%A3%E3%83%9E%E3%82%A4%E3%82%B6%E3%81%AE%E6%8C%87%E5%AE%9A%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6).
- `--use_8bit_adam` and `--use_lion_optimizer` options also work and will override the options above for backward compatibility.
- Add SGDNesterov and its 8bit.
- Add [D-Adaptation](https://github.com/facebookresearch/dadaptation) optimizer. Thanks to BootsofLagrangian and all!
- Please install D-Adaptation optimizer with `pip install dadaptation` (it is not in requirements.txt currently.)
- Please see https://github.com/kohya-ss/sd-scripts/issues/181 for details.
- Add AdaFactor optimizer. Thanks to Toshiaki!
- Extra lr scheduler settings (num_cycles etc.) are working in training scripts other than `train_network.py`.
- Add `--max_grad_norm` option for each training script for gradient clipping. `0.0` disables clipping.
- Symbolic link can be loaded in each training script. Thanks to TkskKurumi!
* 2023/02/19 (v20.7.4):
- Add `--use_lion_optimizer` to each training script to use [Lion optimizer](https://github.com/lucidrains/lion-pytorch).
- Please install Lion optimizer with `pip install lion-pytorch` (it is not in ``requirements.txt`` currently.)
Expand Down
39 changes: 12 additions & 27 deletions fine_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,27 +149,7 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):

# 学習に必要なクラスを準備する
print("prepare optimizer, data loader etc.")

# 8-bit Adamを使う
if args.use_8bit_adam:
try:
import bitsandbytes as bnb
except ImportError:
raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです")
print("use 8-bit Adam optimizer")
optimizer_class = bnb.optim.AdamW8bit
elif args.use_lion_optimizer:
try:
import lion_pytorch
except ImportError:
raise ImportError("No lion_pytorch / lion_pytorch がインストールされていないようです")
print("use Lion optimizer")
optimizer_class = lion_pytorch.Lion
else:
optimizer_class = torch.optim.AdamW

# betaやweight decayはdiffusers DreamBoothもDreamBooth SDもデフォルト値のようなのでオプションはとりあえず省略
optimizer = optimizer_class(params_to_optimize, lr=args.learning_rate)
_, _, optimizer = train_util.get_optimizer(args, trainable_params=params_to_optimize)

# dataloaderを準備する
# DataLoaderのプロセス数:0はメインプロセスになる
Expand All @@ -183,8 +163,9 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):
print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")

# lr schedulerを用意する
lr_scheduler = diffusers.optimization.get_scheduler(
args.lr_scheduler, optimizer, num_warmup_steps=args.lr_warmup_steps, num_training_steps=args.max_train_steps * args.gradient_accumulation_steps)
lr_scheduler = train_util.get_scheduler_fix(args.lr_scheduler, optimizer, num_warmup_steps=args.lr_warmup_steps,
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
num_cycles=args.lr_scheduler_num_cycles, power=args.lr_scheduler_power)

# 実験的機能:勾配も含めたfp16学習を行う モデル全体をfp16にする
if args.full_fp16:
Expand Down Expand Up @@ -286,11 +267,11 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):
loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="mean")

accelerator.backward(loss)
if accelerator.sync_gradients:
if accelerator.sync_gradients and args.max_grad_norm != 0.0:
params_to_clip = []
for m in training_models:
params_to_clip.extend(m.parameters())
accelerator.clip_grad_norm_(params_to_clip, 1.0) # args.max_grad_norm)
accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)

optimizer.step()
lr_scheduler.step()
Expand All @@ -303,9 +284,12 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):

current_loss = loss.detach().item() # 平均なのでbatch sizeは関係ないはず
if args.logging_dir is not None:
logs = {"loss": current_loss, "lr": lr_scheduler.get_last_lr()[0]}
logs = {"loss": current_loss, "lr": float(lr_scheduler.get_last_lr()[0])}
if args.optimizer_type.lower() == "DAdaptation".lower(): # tracking d*lr value
logs["lr/d*lr"] = lr_scheduler.optimizers[0].param_groups[0]['d']*lr_scheduler.optimizers[0].param_groups[0]['lr']
accelerator.log(logs, step=global_step)

# TODO moving averageにする
loss_total += current_loss
avr_loss = loss_total / (step+1)
logs = {"loss": avr_loss} # , "lr": lr_scheduler.get_last_lr()[0]}
Expand All @@ -315,7 +299,7 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):
break

if args.logging_dir is not None:
logs = {"epoch_loss": loss_total / len(train_dataloader)}
logs = {"loss/epoch": loss_total / len(train_dataloader)}
accelerator.log(logs, step=epoch+1)

accelerator.wait_for_everyone()
Expand Down Expand Up @@ -351,6 +335,7 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):
train_util.add_dataset_arguments(parser, False, True, True)
train_util.add_training_arguments(parser, False)
train_util.add_sd_saving_arguments(parser)
train_util.add_optimizer_arguments(parser)

parser.add_argument("--diffusers_xformers", action='store_true',
help='use xformers by diffusers / Diffusersでxformersを使用する')
Expand Down
Loading

0 comments on commit 2a5fb34

Please sign in to comment.