Skip to content

Commit

Permalink
把tokenizer_decode_token__output_buffer和tokenizer_encode_string__outpu…
Browse files Browse the repository at this point in the history
…t_buffer在多线程环境中可能出错的情况进行了修改
  • Loading branch information
aofengdaxia committed Jan 29, 2024
1 parent 97cb612 commit f79b6da
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tools/fastllm_pytools/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def __init__ (self, path : str,

# 为了减少重复申请释放buffer对象而使用的线程局部存储区对象池
self.thread_local_obj = threading.local()
self.thread_local_obj.tokenizer_encode_string__output_buffer = None
self.thread_local_obj.tokenizer_decode_token__output_buffer = None
#self.thread_local_obj.tokenizer_encode_string__output_buffer = None
#self.thread_local_obj.tokenizer_decode_token__output_buffer = None

# tokenizer_decode_token 输出结果的静态缓存,手工触发构建
# 由于token数量有限且不太多,所以缓存该结果来减少调用较为适合。
Expand Down Expand Up @@ -154,7 +154,7 @@ def build_tokenizer_decode_token_cache(self):

def tokenizer_encode_string(self, content: str) -> List[int]:
output_buffer_init_len = 1024
if self.thread_local_obj.tokenizer_encode_string__output_buffer is None:
if "tokenizer_encode_string__output_buffer" not in self.thread_local_obj or self.thread_local_obj.tokenizer_encode_string__output_buffer is None:
self.thread_local_obj.tokenizer_encode_string__output_buffer = (ctypes.c_int * output_buffer_init_len)()

buffer = self.thread_local_obj.tokenizer_encode_string__output_buffer
Expand Down

0 comments on commit f79b6da

Please sign in to comment.