Skip to content

Commit

Permalink
Use fast tokenizer by default.
Browse files Browse the repository at this point in the history
  • Loading branch information
yizhongw committed Sep 22, 2023
1 parent 7040019 commit 28d5366
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 13 deletions.
8 changes: 7 additions & 1 deletion eval/bbh/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ def main(args):
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)

performance = {}
Expand Down Expand Up @@ -224,6 +225,11 @@ def main(args):
default=None,
help="if specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
8 changes: 7 additions & 1 deletion eval/codex_humaneval/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def main(args):
load_in_8bit=args.load_in_8bit,
# device map is determined by the number of gpus available.
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)

# these stop sequences are those mentioned in the codex paper.
Expand Down Expand Up @@ -141,6 +142,11 @@ def main(args):
default=None,
help="If specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
8 changes: 7 additions & 1 deletion eval/gsm/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def main(args):
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)
new_line_token = tokenizer.encode("\n", add_special_tokens=False)[-1] # get the last token because the tokenizer may add space tokens at the start.
outputs = generate_completions(
Expand Down Expand Up @@ -166,6 +167,11 @@ def main(args):
default=None,
help="if specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
8 changes: 7 additions & 1 deletion eval/mmlu/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ def main(args):
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)

subjects = sorted(
Expand Down Expand Up @@ -268,6 +269,11 @@ def main(args):
default=None,
help="if specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
16 changes: 13 additions & 3 deletions eval/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ def parse_args():
type=str,
help="Huggingface tokenizer name or path."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
help="OpenAI engine name.")
help="OpenAI engine name. This should be exclusive with `model_name_or_path`.")
parser.add_argument(
"--input_files",
type=str,
Expand Down Expand Up @@ -146,7 +151,11 @@ def parse_args():
raise ValueError("Either `messages` or `prompt` should be in the instance.")
prompts.append(prompt)
if args.use_vllm:
model = vllm.LLM(model=args.model_name_or_path)
model = vllm.LLM(
model=args.model_name_or_path,
tokenizer=args.tokenizer_name_or_path if args.tokenizer_name_or_path else args.model_name_or_path,
tokenizer_mode="slow" if args.use_slow_tokenizer else "auto",
)
sampling_params = vllm.SamplingParams(
temperature=args.temperature if args.do_sample else 0,
top_p=args.top_p,
Expand All @@ -160,7 +169,8 @@ def parse_args():
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)
outputs = generate_completions(
model=model,
Expand Down
11 changes: 8 additions & 3 deletions eval/toxigen/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def eval_vllm_model(
model = vllm.LLM(
model=args.model_name_or_path,
tokenizer=tokenizer if tokenizer else args.model_name_or_path,
tokenizer_mode="slow" if args.use_slow_tokenizer else "auto",
)
prompts = []
chat_formatting_function = dynamic_import_function(args.chat_formatting_function) if args.use_chat_format else None
Expand Down Expand Up @@ -273,10 +274,9 @@ def main(args):
model_name_or_path=args.model_name_or_path,
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0"
if torch.cuda.device_count() > 1
else "auto",
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)
results = eval_hf_model(
args,
Expand Down Expand Up @@ -322,6 +322,11 @@ def main(args):
default=None,
help="if specified, we will load the tokenizer from here.",
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
8 changes: 7 additions & 1 deletion eval/truthfulqa/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ def main(args):
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)
print("Running generations!")
run_answers(
Expand Down Expand Up @@ -426,6 +427,11 @@ def main(args):
default=None,
help="If specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
8 changes: 7 additions & 1 deletion eval/tydiqa/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ def main(args):
tokenizer_name_or_path=args.tokenizer_name_or_path,
load_in_8bit=args.load_in_8bit,
device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
gptq_model=args.gptq
gptq_model=args.gptq,
use_fast_tokenizer=not args.use_slow_tokenizer,
)
else:
import tiktoken
Expand Down Expand Up @@ -260,6 +261,11 @@ def main(args):
default=None,
help="if specified, we will load the tokenizer from here."
)
parser.add_argument(
"--use_slow_tokenizer",
action="store_true",
help="If given, we will use the slow tokenizer."
)
parser.add_argument(
"--openai_engine",
type=str,
Expand Down
2 changes: 1 addition & 1 deletion eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def load_hf_lm_and_tokenizer(
load_in_8bit=False,
convert_to_half=False,
gptq_model=False,
use_fast_tokenizer=False,
use_fast_tokenizer=True,
padding_side="left",
):

Expand Down

0 comments on commit 28d5366

Please sign in to comment.