Skip to content

Commit

Permalink
Merge pull request datawhalechina#297 from Zyvpeng/master
Browse files Browse the repository at this point in the history
修改之前qwen2-vl微调并没有真正输入图片的bug
  • Loading branch information
KMnO4-zx authored Nov 27, 2024
2 parents 342c2c3 + c060544 commit 1390d89
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 24 deletions.
56 changes: 44 additions & 12 deletions models/Qwen2-VL/04-Qwen2-VL-2B Lora 微调.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -244,28 +244,60 @@
" conversation = example[\"conversations\"]\n",
" input_content = conversation[0][\"value\"]\n",
" output_content = conversation[1][\"value\"]\n",
" \n",
" instruction = tokenizer(\n",
" f\"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n{input_content}<|im_end|>\\n<|im_start|>assistant\\n\",\n",
" add_special_tokens=False,\n",
" file_path = input_content.split(\"<|vision_start|>\")[1].split(\"<|vision_end|>\")[0] # 获取图像路径\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"image\",\n",
" \"image\": f\"{file_path}\",\n",
" \"resized_height\": 280,\n",
" \"resized_width\": 280,\n",
" },\n",
" {\"type\": \"text\", \"text\": \"COCO Yes:\"},\n",
" ],\n",
" }\n",
" ]\n",
" text = processor.apply_chat_template(\n",
" messages, tokenize=False, add_generation_prompt=True\n",
" ) # 获取文本\n",
" image_inputs, video_inputs = process_vision_info(messages) # 获取数据数据(预处理过)\n",
" inputs = processor(\n",
" text=[text],\n",
" images=image_inputs,\n",
" videos=video_inputs,\n",
" padding=True,\n",
" return_tensors=\"pt\",\n",
" )\n",
" inputs = {key: value.tolist() for key, value in inputs.items()} #tensor -> list,为了方便拼接\n",
" instruction = inputs\n",
"\n",
" response = tokenizer(f\"{output_content}\", add_special_tokens=False)\n",
"\n",
"\n",
" input_ids = (\n",
" instruction[\"input_ids\"] + response[\"input_ids\"] + [tokenizer.pad_token_id]\n",
" instruction[\"input_ids\"][0] + response[\"input_ids\"] + [tokenizer.pad_token_id]\n",
" )\n",
" attention_mask = instruction[\"attention_mask\"] + response[\"attention_mask\"] + [1]\n",
"\n",
" attention_mask = instruction[\"attention_mask\"][0] + response[\"attention_mask\"] + [1]\n",
" labels = (\n",
" [-100] * len(instruction[\"input_ids\"])\n",
" + response[\"input_ids\"]\n",
" + [tokenizer.pad_token_id]\n",
" [-100] * len(instruction[\"input_ids\"][0])\n",
" + response[\"input_ids\"]\n",
" + [tokenizer.pad_token_id]\n",
" )\n",
" \n",
" if len(input_ids) > MAX_LENGTH: # 做一个截断\n",
" input_ids = input_ids[:MAX_LENGTH]\n",
" attention_mask = attention_mask[:MAX_LENGTH]\n",
" labels = labels[:MAX_LENGTH]\n",
" \n",
" return {\"input_ids\": input_ids, \"attention_mask\": attention_mask, \"labels\": labels}\n",
"\n",
" input_ids = torch.tensor(input_ids)\n",
" attention_mask = torch.tensor(attention_mask)\n",
" labels = torch.tensor(labels)\n",
" inputs['pixel_values'] = torch.tensor(inputs['pixel_values'])\n",
" inputs['image_grid_thw'] = torch.tensor(inputs['image_grid_thw']).squeeze(0) #由(1,h,w)变换为(h,w)\n",
" return {\"input_ids\": input_ids, \"attention_mask\": attention_mask, \"labels\": labels,\n",
" \"pixel_values\": inputs['pixel_values'], \"image_grid_thw\": inputs['image_grid_thw']}\n",
"\n",
"def predict(messages, model):\n",
" # 准备推理\n",
Expand Down
56 changes: 44 additions & 12 deletions models/Qwen2-VL/04-Qwen2-VL-2B Lora 微调.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,28 +193,60 @@ def process_func(example):
conversation = example["conversations"]
input_content = conversation[0]["value"]
output_content = conversation[1]["value"]

instruction = tokenizer(
f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{input_content}<|im_end|>\n<|im_start|>assistant\n",
add_special_tokens=False,
file_path = input_content.split("<|vision_start|>")[1].split("<|vision_end|>")[0] # 获取图像路径
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": f"{file_path}",
"resized_height": 280,
"resized_width": 280,
},
{"type": "text", "text": "COCO Yes:"},
],
}
]
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
) # 获取文本
image_inputs, video_inputs = process_vision_info(messages) # 获取数据数据(预处理过)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = {key: value.tolist() for key, value in inputs.items()} #tensor -> list,为了方便拼接
instruction = inputs

response = tokenizer(f"{output_content}", add_special_tokens=False)


input_ids = (
instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
instruction["input_ids"][0] + response["input_ids"] + [tokenizer.pad_token_id]
)
attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]

attention_mask = instruction["attention_mask"][0] + response["attention_mask"] + [1]
labels = (
[-100] * len(instruction["input_ids"])
+ response["input_ids"]
+ [tokenizer.pad_token_id]
[-100] * len(instruction["input_ids"][0])
+ response["input_ids"]
+ [tokenizer.pad_token_id]
)

if len(input_ids) > MAX_LENGTH: # 做一个截断
input_ids = input_ids[:MAX_LENGTH]
attention_mask = attention_mask[:MAX_LENGTH]
labels = labels[:MAX_LENGTH]

return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

input_ids = torch.tensor(input_ids)
attention_mask = torch.tensor(attention_mask)
labels = torch.tensor(labels)
inputs['pixel_values'] = torch.tensor(inputs['pixel_values'])
inputs['image_grid_thw'] = torch.tensor(inputs['image_grid_thw']).squeeze(0) #由(1,h,w)变换为(h,w)
return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels,
"pixel_values": inputs['pixel_values'], "image_grid_thw": inputs['image_grid_thw']}

def predict(messages, model):
# 准备推理
Expand Down

0 comments on commit 1390d89

Please sign in to comment.