Skip to content

Commit

Permalink
[hotfix] fix gpt gemini example (hpcaitech#2404)
Browse files Browse the repository at this point in the history
* [hotfix] fix gpt gemini example

* [example] add new assertions
  • Loading branch information
1SAA authored Jan 9, 2023
1 parent 9880fd2 commit 498b5ca
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
30 changes: 16 additions & 14 deletions examples/language/gpt/gemini/benchmark_gemini.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
for MODEL_TYPE in "gpt2_medium"; do
for BATCH_SIZE in 16; do
for GPUNUM in 1 2 4 8; do
for TPDEGREE in 1 2 4 8; do
if [ ${TPDEGREE} -gt ${GPUNUM} ]; then
continue
fi
for PLACEMENT in "cpu" "auto"; do
echo "****************** Begin ***************************"
echo "* benchmrking MODEL_TYPE ${MODEL_TYPE} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE} PLACEMENT ${PLACEMENT}"
MODEL_TYPE=${MODEL_TYPE} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \
bash ./gemini/run_gemini.sh
echo "****************** Finished ***************************"
echo ""
echo ""
for DISPAN in "colossalai"; do
for BATCH_SIZE in 16; do
for GPUNUM in 1 2 4 8; do
for TPDEGREE in 1 2 4 8; do
if [ ${TPDEGREE} -gt ${GPUNUM} ]; then
continue
fi
for PLACEMENT in "cpu" "auto"; do
echo "****************** Begin ***************************"
echo "+ benchmrking MODEL ${MODEL_TYPE} DISPAN ${DISPAN} GPU ${GPUNUM} BS ${BATCH_SIZE} TP ${TPDEGREE} POLICY ${PLACEMENT}"
MODEL_TYPE=${MODEL_TYPE} DISPAN=${DISPAN} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \
bash ./run_gemini.sh
echo "****************** Finished ***************************"
echo ""
echo ""
done
done
done
done
Expand Down
2 changes: 2 additions & 0 deletions examples/language/gpt/gemini/train_gpt_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ def main():

tp_pg = ProcessGroup(tp_degree=args.tp_degree)
# Tensor Parallelism (TP)
# You should notice that v0.1.10 is not compatible with TP degree > 1
tensor_parallelize(model, tp_pg)

# build a Gemini model and a highly optimized cpu optimizer
Expand All @@ -278,6 +279,7 @@ def main():

logger.info(get_mem_info(prefix='After init optim, '), ranks=[0])
else:
assert args.tp_degree == 1, "The degree of TP should be 1 for DDP examples."
model = model_builder(args.model_type)(checkpoint=True).cuda()

if args.distplan.startswith("torch"):
Expand Down

0 comments on commit 498b5ca

Please sign in to comment.