forked from Theigrams/g1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
276 lines (228 loc) · 11.4 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import base64
import json
import logging
import os
import time
import streamlit as st
from dotenv import load_dotenv
from llm.V4 import Chatbot, AppBaseModel
load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
client = Chatbot(api_key=os.getenv('OPENAI_API_KEY'), api_url=os.getenv('OPENAI_API_BASE'))
class StepResultModel(AppBaseModel):
title: str
content: str
next_action: str
confidence: float
def make_api_call(messages, max_tokens, temperature=0.5, is_final_answer=False, model="gpt-4o"):
for attempt in range(3):
try:
logger.info(f"尝试进行API调用 (第 {attempt + 1}/3 次尝试)")
content, _, _, _ = client.ask(
model=model,
prompt=messages,
json_format=True,
max_tokens=max_tokens,
temperature=temperature,
response_model=StepResultModel
)
logger.info("API调用成功")
logger.info(content)
return json.loads(content)
except Exception as e:
logger.error(f"API调用失败 (第 {attempt + 1}/3 次尝试)。错误: {str(e)}")
if attempt == 2:
if is_final_answer:
logger.error("3次尝试后未能生成最终答案")
return {
"title": "错误",
"content": f"3次尝试后未能生成最终答案。错误: {str(e)}",
}
else:
logger.error("3次尝试后未能生成步骤")
return {
"title": "错误",
"content": f"3次尝试后未能生成步骤。错误: {str(e)}",
"next_action": "final_answer",
}
logger.info("等待1秒后重试")
time.sleep(1) # 重试前等待1秒
def generate_response(prompt, max_steps=5, temperature=0.5, model="gpt-4o"):
logger.info(f"正在为提示生成回答: {prompt}")
messages = [
{
"role": "system",
"content": """You are an AI assistant that explains your reasoning step by step, incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Follow these instructions:
1. Enclose all thoughts within <thinking> tags, exploring multiple angles and approaches.
2. Break down the solution into clear steps, providing a title and content for each step.
3. After each step, decide if you need another step or if you're ready to give the final answer.
4. Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress.
5. Regularly evaluate your progress, being critical and honest about your reasoning process.
6. Assign a quality score between 0.0 and 1.0 to guide your approach:
- 0.8+: Continue current approach
- 0.5-0.7: Consider minor adjustments
- Below 0.5: Seriously consider backtracking and trying a different approach
7. If unsure or if your score is low, backtrack and try a different approach, explaining your decision.
8. For mathematical problems, show all work explicitly using LaTeX for formal notation and provide detailed proofs.
9. Explore multiple solutions individually if possible, comparing approaches in your reflections.
10. Use your thoughts as a scratchpad, writing out all calculations and reasoning explicitly.
11. Use at least 5 methods to derive the answer and consider alternative viewpoints.
12. Be aware of your limitations as an AI and what you can and cannot do.
After every 3 steps, perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints.
Respond in JSON format with 'title', 'content', 'next_action' (either 'continue', 'reflect', or 'final_answer'), and 'confidence' (a number between 0 and 1) keys.
Example of a valid JSON response:
```json
{
"title": "Identifying Key Information",
"content": "To begin solving this problem, we need to carefully examine the given information and identify the crucial elements that will guide our solution process. This involves...",
"next_action": "continue",
"confidence": 0.8
}```
Your goal is to demonstrate a thorough, adaptive, and self-reflective problem-solving process, emphasizing dynamic thinking and learning from your own reasoning.""",
},
{"role": "user", "content": prompt}
]
steps = []
step_count = 1
total_thinking_time = 0
while True:
logger.info(f"开始第 {step_count} 步")
start_time = time.time()
step_data = make_api_call(messages, 4096, temperature=temperature, model=model)
end_time = time.time()
thinking_time = end_time - start_time
total_thinking_time += thinking_time
logger.info(f"第 {step_count} 步完成。思考时间: {thinking_time:.2f} 秒")
steps.append((f"{step_data['title']}", step_data["content"], thinking_time))
messages.append({"role": "assistant", "content": json.dumps(step_data)})
if step_data["next_action"] == "final_answer" and step_count < max_steps:
messages.append({"role": "user",
"content": "Please continue your analysis with at least 5 more steps before providing the final answer."})
elif step_data["next_action"] == "final_answer":
logger.info("已达到最终答案或最大步骤数")
break
elif step_data["next_action"] == 'reflect' or step_count % 3 == 0:
messages.append({"role": "user",
"content": "Please perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints."})
else:
messages.append({"role": "user", "content": "Please continue with the next step in your analysis."})
step_count += 1
yield steps, None, None # 我们现在yield三个值,但只有steps是有意义的
# 生成最终答案
messages.append({"role": "user",
"content": "Please provide a comprehensive final answer based on your reasoning above, summarizing key points and addressing any uncertainties. USE JSON Formate"})
start_time = time.time()
final_data = make_api_call(messages, 4096, temperature=temperature, is_final_answer=True, model=model)
end_time = time.time()
thinking_time = end_time - start_time
total_thinking_time += thinking_time
if 'content' in final_data:
final_content = final_data["content"]
elif 'final_answer' in final_data:
final_content = final_data["final_answer"]
else:
final_content = json.dumps(final_data)
logger.info(f"最终答案已生成。思考时间: {thinking_time:.2f} 秒")
steps.append(("最终答案", final_content, thinking_time))
logger.info(f"总思考时间: {total_thinking_time:.2f} 秒")
full_response = {"steps": steps, "total_thinking_time": total_thinking_time}
yield steps, total_thinking_time, full_response
def get_binary_file_downloader_html(bin_file, file_label="文件"):
with open(bin_file, "rb") as f:
data = f.read()
bin_str = base64.b64encode(data).decode()
href = f"""
<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}"
style="display: inline-block; padding: 0.5em 1em; color: white; background-color: #4CAF50; text-decoration: none; border-radius: 4px;">
📥 下载 {file_label}
</a>
"""
return href
def main():
st.set_page_config(page_title="g1 原型", page_icon="🧠", layout="wide")
st.title("g1: 使用 LLM 创建类似 o1 的推理链")
st.markdown(
"""
<style>
/* 新样式 */
h1, h2, h3 {
color: #1e3a8a;
}
/* 侧边栏样式调整 */
.css-1d391kg {
padding-top: 1rem;
padding-right: 0.5rem;
padding-left: 0.5rem;
}
.css-1d391kg .block-container {
padding-top: 1rem;
}
/* 调整侧边栏宽度 */
.css-1q1n0ol {
max-width: 14rem;
}
</style>
""",
unsafe_allow_html=True,
)
st.markdown(
"""
这是一个早期原型,使用提示来创建类似 o1 的推理链以提高输出准确性。它并不完美,准确性尚未经过正式评估。
开源[代码库在此](https://github.com/Theigrams/g1)
"""
)
with st.sidebar:
st.markdown("## 🛠️ 设置")
st.markdown("<br>", unsafe_allow_html=True) # 添加间距
st.markdown("### 🤖 模型设置")
model_options = [
"claude-3-5-sonnet-20240620", "claude-3-haiku-20240307",
"gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo",
"gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b-exp-0924",
"qwen2-72b-instruct", "qwen2.5-72b-instruct",
"llama-3.1-70b-versatile"
]
selected_model = st.selectbox("选择模型", model_options)
model = selected_model
st.markdown("<br>", unsafe_allow_html=True) # 添加间距
st.markdown("### ⚙️ 生成设置")
max_steps = st.slider("最大步骤数", 3, 32, 10)
temperature = st.slider("温度", 0.0, 1.0, 0.2, 0.1)
# 用户查询的文本输入和发送按钮
st.markdown("### 🔍 输入您的查询")
col1, col2 = st.columns([5, 1]) # 创建两列,比例为 5:1
with col1:
user_query = st.text_input("", placeholder="例如:1.11 和 1.3 哪个大?",
label_visibility="collapsed")
with col2:
send_button = st.button("发送")
if send_button and user_query:
with st.spinner("正在生成回答..."): # 添加加载指示器
# 创建空元素以保存生成的文本和总时间
response_container = st.empty()
time_container = st.empty()
download_container = st.empty()
# 生成并显示回答
for steps, total_thinking_time, full_response in generate_response(
user_query, max_steps=max_steps, temperature=temperature, model=model
):
with response_container.container():
for i, (title, content, thinking_time) in enumerate(steps):
if title.startswith("最终答案"):
st.markdown(f"### 🎯 {title}")
st.info(content)
else:
with st.expander(f"🧠 {title} (思考时间: {thinking_time:.2f} 秒)", expanded=True):
st.write(content) # 使用 write 而不是 markdown 以避免 HTML 转义问题
# 仅在结束时显示总时间
if total_thinking_time is not None and full_response is not None:
time_container.markdown(f"⏱️ **总思考时间: {total_thinking_time:.2f} 秒**")
# 创建 JSON 文件并提供下载链接
json_filename = "reasoning_chain.json"
with open(json_filename, "w") as f:
json.dump(full_response, f, indent=2)
download_link = get_binary_file_downloader_html(json_filename, "完整推理链 JSON")
download_container.markdown(download_link, unsafe_allow_html=True)
if __name__ == "__main__":
main()