forked from KarhouTam/FL-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefaults.yaml
125 lines (107 loc) · 4.92 KB
/
defaults.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# This default config file shows all arguments.
# NOTE: If your custom config file doesn't have all of following arguments (not including the advanced FL methods'),
# the defaults can be found in src/utils/constants.py
# The method name must be identical to the file name in src/server/<method>.py
method: fedavg
dataset:
# [mnist, cifar10, cifar100, emnist, fmnist, femnist, medmnist,
# medmnistA, medmnistC, covid19, celeba, synthetic, svhn,
# tiny_imagenet, cinic10, domain]
name: mnist
model:
name: lenet5
# Whether to use torchvision integrated model weights.
# Has no effect if model is lenet5, 2nn or fedavgcnn
use_torchvision_pretrained_weights: true
# The model parameters `.pt` file relative path to the directory of FL-bench.
# This feature is enabled only when `unique_model=False`,
# which is pre-defined and fixed by each FL method.
external_model_weights_path: null
# The learning rate scheduler that used for client local training.
# Can be null if no lr_scheduler is needed.
lr_scheduler:
name: null # [null, step, cosine, constant, plateau]
step_size: 10 # step
gamma: 0.1 # [step, plateau]
T_max: 10 # cosine
eta_min: 0 # cosine
factor: 0.3334 # [constant, plateau]
total_iters: 5 # constant
mode: min # plataeu
patience: 10 # plateau
threshold: 1.0e-4 # plateau
threshold_mode: rel # plateau
cooldown: 0 # plateau
min_lr: 0 # plateau
eps: 1.0e-8 # plateau
last_epoch: -1
# The optimizer that used for client local training.
optimizer:
name: sgd # [sgd, adam, adamw, rmsprop, adagrad]
lr: 0.01
dampening: 0 # for SGD
weight_decay: 0
momentum: 0 # for [SGD, RMSprop]
alpha: 0.99 # for RMSprop
nesterov: false # for SGD
betas: [0.9, 0.999] # for [Adam, AdamW]
amsgrad: false # for [Adam, AdamW]
mode: serial # [serial, parallel]
# It's fine to keep these configs. if mode is 'serial', these configs will be ignored.
parallel:
# The IP address of the selected ray cluster.
# Default as null, which means if there is no existing ray cluster,
# then Ray will create a new cluster at the beginning of the experiment
# and destroy it at the end.
# More details can be found in https://docs.ray.io/en/latest/ray-core/api/doc/ray.init.html.
ray_cluster_addr: null # [null, auto, local]
# The amount of computational resources you allocate for your Ray cluster.
# Default as null for all.
num_cpus: null
num_gpus: null
# Should be set larger than 1, or training mode fallback to `serial`
# Set a larger `num_workers` can further boost efficiency,
# but also let each worker have less computational resources.
num_workers: 2
common:
seed: 42 # Random seed of the run.
join_ratio: 0.1 # Ratio for (client each round) / (client num in total).
global_epoch: 100 # Number of global epochs, also called communication round.
local_epoch: 5 # Number if epochs of client local training.
finetune_epoch: 0 # Number of epochs of clients fine-tunning their models before test.
batch_size: 32 # Data batch size for client local training.
test_interval: 100 # Interval round of performing test on all test clients.
# The ratio of stragglers (set in [0, 1]).
# Stragglers would not perform full-epoch local training as normal clients.
# Their local epoch would be randomly selected from range [straggler_min_local_epoch, local_epoch).
straggler_ratio: 0
straggler_min_local_epoch: 0
# How to deal with parameter buffers (in model.buffers()) of each client model.
# global (default): buffers will be aggregated like other model parameters.
# local: clients' buffers are isolated.
# drop: clients will drop their buffers after training done.
buffers: global # [local, global, drop]
# Set eval_<...> as true for performing evaluation on <...>sets held by
# this round's joined clients before and after local training.
eval_test: true
eval_val: false
eval_train: false
verbose_gap: 10 # Interval round of displaying clients training performance on terminal.
monitor: null # [null, visdom, tensorboard]
use_cuda: true # Whether to use cuda for training.
save_log: true # Whether to save log files in out/<method>/<start_time>.
save_model: false # Whether to save model weights (*.pt) in out/<method>/<start_time>.
save_fig: true # Whether to save learning curve firgure (*.png) in out/<method>/<start_time>.
save_metrics: true # Whether to save metrics (*.csv) in out/<method>/<start_time>.
# Whether to delete output files after user press `Ctrl + C`,
# which indicates that the run is removable.
delete_useless_run: true
# You can set specific arguments for advanced FL methods also.
# FL-bench uses FL method arguments by args.<method>.<arg>.
# You need to follow the key set in `get_hyperparams()` in class <method>Server, src/server/<method>.py
# FL-bench will ignore these arguments if they are not supported by the selected method,
# e.g., if you are running FedProx, then pfedsim arguments will be ignored.
fedprox:
mu: 0.01
pfedsim:
warmup_round: 0.5