forked from mst272/LLM-Dojo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update command line run and data process
- Loading branch information
Showing
7 changed files
with
91 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,6 @@ | ||
{"message": [{"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate"},{"role": "user", "content": "How many helicopters can a human eat in one sitting"},{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together"},{"role": "user", "content": "How many hel"},{"role": "assistant", "content": "Snogether"}]} | ||
{"message": [{"role": "user", "content": "How many helicopters can a human eat in one sitting"},{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together"},{"role": "user", "content": "你是谁"},{"role": "assistant", "content": "我是一个智能机器人 "}]} | ||
{"message": [{"role": "user", "content": "Create a function to calculate the sum of a sequence of integers."}, {"role": "assistant", "content": "# Python code def sum_sequence(sequence): sum = 0 for num in sequence: sum += num return sum"}]} | ||
{"message": [{"role": "user", "content": "Generate a Python code for crawling a website for a specific type of data."}, {"role": "assistant", "content": "import requests import re def crawl_website_for_phone_numbers(website): response = requests.get(website) phone_numbers = re.findall('\\d{3}-\\d{3}-\\d{4}', response.text) return phone_numbers if __name__ == '__main__': print(crawl_website_for_phone_numbers('www.example.com'))"}]} | ||
{"message": [{"role": "user", "content": "Create a nested loop to print every combination of numbers between 0-9, excluding any combination that contains the number 5. Additionally, exclude any combination that contains a repeating digit. Implement the solution without using any built-in functions or libraries to check for repeating digits."}, {"role": "assistant", "content": "Here is an example of a nested loop in Python to print every combination of numbers between 0-9, excluding any combination that contains the number 5 or repeating digits: ```python for i in range(10): # First digit for j in range(10): # Second digit for k in range(10): # Third digit # Checking for the conditions if i != 5 and j != 5 and k != 5 and i != j and i != k and j != k: print(i, j, k) ``` This code will generate and print every combination of three digits between 0-9 that do not contain the number 5 and do not have any repeating digits."}]} | ||
{"message": [{"role": "user", "content": "Generate a Python program that adds two then doubles the result."}, {"role": "assistant", "content": "def double_add(num1, num2): return (num1 + num2) * 2"}]} | ||
{"message": [{"role": "user", "content": "Develop a function in Python that prints out the Pascal's triangle for a given number of rows."}, {"role": "assistant", "content": "def pascal_triangle(n): trow = [1] y = [0] for x in range(max(n, 0)): print(trow) trow=[l+r for l,r in zip(trow + y, y + trow)] return n>=1 pascal_triangle(5)"}]} | ||
{"message": [{"role": "user", "content": "Write a Python program to find all the numbers divisible by a certain number within a certain range."}, {"role": "assistant", "content": "def find_numbers_divisible_by(m, n, divisor): divisible_numbers = [] for i in range(m, n+1): if i % divisor == 0: divisible_numbers.append(i) return divisible_numbers"}]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import pandas as pd | ||
|
||
ORIGINAL_DATA_PATH = './1.jsonl' # 原数据路径 | ||
OUT_DATA_PATH = './out_data.jsonl' # 转换为框架适用的role content模式的输出路径 | ||
|
||
|
||
data1 = pd.read_json(ORIGINAL_DATA_PATH, lines=True) | ||
# 创建一个空的列表来存储处理后的数据 | ||
processed_data = [] | ||
# 迭代每一行数据 | ||
for index, row in data1.iterrows(): | ||
message = [ | ||
{"role": "user", "content": row['instruction']}, | ||
{"role": "assistant", "content": row['output']} | ||
] | ||
processed_data.append({"message": message}) | ||
# 将处理后的数据转换为 DataFrame | ||
processed_df = pd.DataFrame(processed_data) | ||
# 保存为jsonl格式 | ||
processed_df.to_json(OUT_DATA_PATH, orient='records', lines=True, force_ascii=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,35 @@ | ||
|
||
DATA_PATH='' | ||
OUTPUT_PATH="" | ||
MODEL_PATH="" | ||
|
||
# deepspeed 启动 | ||
deepspeed --include localhost:0,1 main_train.py\ | ||
--train_data_path 数据集路径\ | ||
--model_name_or_path 模型路径\ | ||
--task_type sft\ | ||
--train_mode qlora\ | ||
--output_dir 输出路径 | ||
--train_data_path "$DATA_PATH" \ | ||
--model_name_or_path "$MODEL_PATH" \ | ||
--max_len 1024 \ | ||
--num_train_epochs 1 \ | ||
--per_device_train_batch_size 8 \ | ||
--per_device_eval_batch_size 1 \ | ||
--gradient_accumulation_steps 4 \ | ||
--task_type "sft" \ | ||
--train_mode "qlora" \ | ||
--output_dir "$OUTPUT_PATH" \ | ||
--save_strategy "steps" \ | ||
--save_steps 500 \ | ||
--save_total_limit 5 \ | ||
--learning_rate 2e-4 \ | ||
--warmup_steps 10 \ | ||
--logging_steps 1 \ | ||
--lr_scheduler_type "cosine_with_min_lr" \ | ||
--gradient_checkpointing True \ | ||
--report_to "wandb" \ | ||
--deepspeed './train_args/deepspeed_config/ds_config_zero2.json' \ | ||
--bf16 True | ||
|
||
# task_type:[pretrain, sft, dpo_multi, dpo_single] | ||
# train_mode:[qlora, lora, full] | ||
|
||
|
||
|
||
# python main_train.py --train_data_path 数据集路径 --model_name_or_path 模型路径 | ||
# python main_train.py --train_data_path 数据集路径 --model_name_or_path 模型路径 ......同上述传入参数 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters