Skip to content

Commit

Permalink
Update comments in modelconfigs and synchronous loop
Browse files Browse the repository at this point in the history
  • Loading branch information
lightvector committed May 22, 2023
1 parent 2af6abf commit 3f83026
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 19 deletions.
20 changes: 9 additions & 11 deletions python/modelconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,20 +1351,20 @@ def get_num_global_input_features(config: ModelConfig):


base_config_of_name = {
# Small nets
# Micro-sized model configs
"b1c6nbt": b1c6nbt,
"b2c16": b2c16,
"b4c32": b4c32,
"b6c96": b6c96,

# Configs not too different in inference cost from b10c128
# Small model configs, not too different in inference cost from b10c128
"b10c128": b10c128,
"b5c192nbt": b5c192nbt,

# Configs not too different in inference cost from b15c192
# Medium model configs, not too different in inference cost from b15c192
"b15c192": b15c192,

# Configs not too different in inference cost from b20c256
# Roughly AlphaZero-sized, not too different in inference cost from b20c256
"b20c256": b20c256,
"b30c256bt": b30c256bt,
"b24c320bt": b24c320bt,
Expand All @@ -1374,26 +1374,24 @@ def get_num_global_input_features(config: ModelConfig):
"b15c384lbt": b15c384lbt,
"b10c512lbt": b10c512lbt,
"b12c384llbt": b12c384llbt,
"b10c384nbt": b10c384nbt, # Recommended
"b10c384nbt": b10c384nbt, # Recommended best config for this cost
"b10c480nb3t": b10c480nb3t,
"b7c384lnbt": b7c384lnbt,
"b5c512nnbt": b5c512nnbt,

# Oddball config
"b20c384lbt": b20c384lbt,

# Configs not too different in inference cost from b40c256
# Roughly AlphaGoZero-sized, not too different in inference cost from b40c256
"b30c320": b30c320,
"b40c256": b40c256,
"b18c384nbt": b18c384nbt, # Recommended
"b18c384nbt": b18c384nbt, # Recommended best config for this cost
"b14c448nbt": b14c448nbt,

# Configs not too different in inference cost from b60c320
# Large model configs, not too different in inference cost from b60c320
"b40c384": b40c384,
"b60c320": b60c320,
"b41c384nbt": b41c384nbt,
"b32c448nbt": b32c448nbt,
"b28c512nbt": b28c512nbt, # Recommended
"b28c512nbt": b28c512nbt, # Recommended best config for this cost
"b20c640nbt": b20c640nbt,

"sandbox": sandbox,
Expand Down
20 changes: 12 additions & 8 deletions python/selfplay/synchronous_loop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ set -o pipefail
# Assumes you have the cpp directory compiled and the katago executable is there.

# If using multiple machines, or even possibly many GPUs on one machine in some cases, then this is NOT the
# recommended method, instead it is better to run all steps simultaneously and asynchronously. See README.md in
# recommended method, instead it is better to run all steps simultaneously and asynchronously. See SelfplayTraining.md in
# the root of the KataGo repo for more details.

if [[ $# -lt 5 ]]
then
echo "Usage: $0 NAMEPREFIX BASEDIR TRAININGNAME MODELKIND USEGATING"
echo "Assumes katago is built in the 'cpp' directory of the KataGo repo and the executable is present at cpp/katago."
echo "Assumes katago is already built in the 'cpp' directory of the KataGo repo and the executable is present at cpp/katago."
echo "NAMEPREFIX string prefix for this training run, try to pick something globally unique. Will be displayed to users when KataGo loads the model."
echo "BASEDIR containing selfplay data and models and related directories"
echo "TRANINGNAME name to prefix models with, specific to this training daemon"
Expand All @@ -32,29 +32,32 @@ USEGATING="$1"
shift

BASEDIR="$(realpath "$BASEDIRRAW")"

GITROOTDIR="$(git rev-parse --show-toplevel)"

LOGSDIR="$BASEDIR"/logs
SCRATCHDIR="$BASEDIR"/shufflescratch

# Create all the directories we need
mkdir -p "$BASEDIR"
mkdir -p "$LOGSDIR"
mkdir -p "$SCRATCHDIR"
mkdir -p "$BASEDIR"/selfplay
mkdir -p "$BASEDIR"/gatekeepersgf

# NOTE: You probably want to edit settings in the cpp/configs/selfplay1.cfg - what board sizes and rules, you want to learn, number of visits to use, etc.
# NOTE: You may want to adjust these numbers.
# Parameters for the training run
# NOTE: You may want to adjust the below numbers.
# NOTE: You probably want to edit settings in the cpp/configs/training/selfplay1.cfg
# Such as what board sizes and rules, you want to learn, number of visits to use, etc.
NUM_GAMES_PER_CYCLE=1000
NUM_THREADS_FOR_SHUFFLING=8
NUM_TRAIN_SAMPLES_PER_CYCLE=500000
NUM_TRAIN_SAMPLES_PER_SWA=200000
BATCHSIZE=128 # KataGo normally uses batch size 256, and you can do that too, but for lower-end GPUs 64 or 128 may be needed to avoid running out of memory.
BATCHSIZE=128 # For lower-end GPUs 64 or smaller may be needed to avoid running out of GPU memory.
SHUFFLE_MINROWS=80000
SHUFFLE_KEEPROWS=600000 # A little larger than NUM_TRAIN_SAMPLES_PER_CYCLE
SELFPLAY_CONFIG="$GITROOTDIR"/cpp/configs/training/selfplay1.cfg
GATING_CONFIG="$GITROOTDIR"/cpp/configs/training/gatekeeper1.cfg

# Copy all the relevant scripts and configs and the katago executable to a dated directory.
# For archival and logging purposes - you can look back and see exactly the python code on a particular date
DATE_FOR_FILENAME=$(date "+%Y%m%d-%H%M%S")
DATED_ARCHIVE="$BASEDIR"/scripts/dated/"$DATE_FOR_FILENAME"
Expand All @@ -67,9 +70,10 @@ git show --no-patch --no-color > "$DATED_ARCHIVE"/version.txt
git diff --no-color > "$DATED_ARCHIVE"/diff.txt
git diff --staged --no-color > "$DATED_ARCHIVE"/diffstaged.txt

# Also run the code out of the archive, so that we don't unexpectedly crash or have changes if the local repo changes.
# Also run the code out of the archive, so that we don't unexpectedly crash or change behavior if the local repo changes.
cd "$DATED_ARCHIVE"

# Begin looping forever, running each step in order.
set -x
while true
do
Expand Down

0 comments on commit 3f83026

Please sign in to comment.