Truncate Single Convocation (mlc-ai#300)

Truncate Single Convocation This PR adds support to truncate a single conversation, to prevent the crash if a single conversation has too many tokens.
Tsok-XYZ · Jun 3, 2023 · f6fa30c · f6fa30c
1 parent 417f0ac
commit f6fa30c
Showing 1 changed file with 16 additions and 4 deletions.
diff --git a/cpp/llm_chat.cc b/cpp/llm_chat.cc
@@ -434,12 +434,24 @@ class LLMChat {
  }
  }
  // keep system
- all_prompt = GetConcatPrompt(prompts, 1, start_re_encode_pos);
+ if (this->conversation_.system.empty()) {
+ all_prompt = GetConcatPrompt(prompts, 0, start_re_encode_pos);
+ } else {
+ all_prompt = GetConcatPrompt(prompts, 1, start_re_encode_pos);
+ }
  encoded = this->tokenizer_->Encode(all_prompt);
  tokens.insert(tokens.end(), encoded.begin(), encoded.end());
-
- if (tokens.size() + this->mean_gen_len_ >= this->max_window_size_) {
- LOG(FATAL) << "Exceed max window length curr=" << tokens.size();
+ if (tokens.size() >= this->max_window_size_) {
+ LOG(WARNING)
+ << "The prompt tokens are more than `max_window_size`, the input will be truncated.";
+ ICHECK_GT(this->max_window_size_, this->mean_gen_len_);
+ std::vector<int32_t> truncated_tokens(
+ tokens.end() - (this->max_window_size_ - this->mean_gen_len_), tokens.end());
+ return truncated_tokens;
+ } else if (tokens.size() + this->mean_gen_len_ >= this->max_window_size_) {
+ LOG(WARNING)
+ << "The prompt tokens are too long and the generated text may be incomplete, due to "
+ "limited `max_window_size`. ";
  }
  return tokens;
  }