修复一些微调bug#1079

T-bagwell · Apr 2, 2024 · 6e00a6c · 6e00a6c
1 parent 92aff98
commit 6e00a6c
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 2 deletions.
diff --git a/finetune_demo/finetune_hf.py b/finetune_demo/finetune_hf.py
@@ -513,6 +513,7 @@ def main(
     ]
     model.gradient_checkpointing_enable()
     model.enable_input_require_grads()
+
     trainer = Seq2SeqTrainer(
         model=model,
         args=ft_config.training_args,
@@ -523,7 +524,7 @@ def main(
         ),
         train_dataset=train_dataset,
         eval_dataset=val_dataset.select(list(range(50))),
-        # tokenizer=tokenizer, # to avoid the wrong of the tokenizer
+        tokenizer=tokenizer if ft_config.peft_config.peft_type != "LORA" else None,  # LORA does not need tokenizer
         compute_metrics=functools.partial(compute_metrics, tokenizer=tokenizer),
     )
 

diff --git a/openai_api_demo/api_server.py b/openai_api_demo/api_server.py
@@ -23,6 +23,9 @@
     Users need to configure their special tokens and can enable multi-GPU support as per the provided instructions.
     Embedding Models only support in One GPU.
 
+    Running this script requires 14-15GB of GPU memory. 2 GB for the embedding model and 12-13 GB for the FP16 ChatGLM3 LLM.
+
+
 """
 
 import os
@@ -52,7 +55,7 @@
 TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH)
 
 # set Embedding Model path
-EMBEDDING_PATH = os.environ.get('EMBEDDING_PATH', 'BAAI/bge-large-zh-v1.5')
+EMBEDDING_PATH = os.environ.get('EMBEDDING_PATH', 'BAAI/bge-m3')
 
 
 @asynccontextmanager

diff --git a/openai_api_demo/langchain_openai_api.py b/openai_api_demo/langchain_openai_api.py
@@ -6,6 +6,7 @@
 to start the GLM3 model's service.
 2. Run the Script: The script includes functionality for initializing the LLMChain object and obtaining AI responses,
 allowing the user to input questions and receive AI answers.
+
 """
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -23,6 +24,8 @@ def initialize_llm_chain(messages: list):
         max_tokens=8096,
         prefix_messages=messages,
         top_p=0.9,
+        streaming=True, # Set to True for streaming completions
+
     )
     return LLMChain(prompt=prompt, llm=llm)