Skip to content

Commit

Permalink
update spider
Browse files Browse the repository at this point in the history
  • Loading branch information
yu-shaonian committed Dec 29, 2023
1 parent 0d730db commit c7991c1
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 277 deletions.
3 changes: 2 additions & 1 deletion CompanyConfig/Default/ChatChainConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@
"clear_structure": "True",
"gui_design": "True",
"git_management": "False",
"web_spider": "False",
"self_improve": "False",
"incremental_develop": "False",
"background_prompt": "ChatDev is a software company powered by multiple intelligent agents, such as chief executive officer, chief human resources officer, chief product officer, chief technology officer, etc, with a multi-agent organizational structure and the mission of 'changing the digital world through programming'."
}
}
551 changes: 276 additions & 275 deletions CompanyConfig/Default/PhaseConfig.json

Large diffs are not rendered by default.

89 changes: 89 additions & 0 deletions camel/web_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import wikipediaapi
import os
import time

self_api_key = os.environ.get('OPENAI_API_KEY')
BASE_URL = os.environ.get('BASE_URL')

if BASE_URL:
client = openai.OpenAI(
api_key=self_api_key,
base_url=BASE_URL,
)
else:
client = openai.OpenAI(
api_key=self_api_key
)

def get_baidu_baike_content(keyword):
# design api by the baidubaike
url = f'https://baike.baidu.com/item/{keyword}'
# post request
response = requests.get(url)

# Beautiful Soup part for the html content
soup = BeautifulSoup(response.content, 'html.parser')
# find the main content in the page
# main_content = soup.find('div', class_='lemma-summary')
main_content = soup.contents[-1].contents[0].contents[4].attrs['content']
# find the target content
# content_text = main_content.get_text().strip()
return main_content


def get_wiki_content(keyword):
# Wikipedia API ready
wiki_wiki = wikipediaapi.Wikipedia('MyProjectName ([email protected])', 'en')
#the topic content which you want to spider
search_topic = keyword
# get the page content
page_py = wiki_wiki.page(search_topic)
# check the existence of the content in the page
if page_py.exists():
print("Page - Title:", page_py.title)
print("Page - Summary:", page_py.summary)
else:
print("Page not found.")
return page_py.summary



def modal_trans(task_dsp):
try:
task_in ="'" + task_dsp + \
"'Just give me the most important keyword about this sentence without explaining it and your answer should be only one keyword."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
response_text = response.choices[0].message.content
spider_content = get_wiki_content(response_text)
# time.sleep(1)
task_in = "'" + spider_content + \
"',Summarize this paragraph and return the key information."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
result = response.choices[0].message.content
print("web spider content:", result)
except:
result = ''
print("the content is none")
return result
4 changes: 4 additions & 0 deletions chatdev/chat_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from camel.typing import TaskType, ModelType
from chatdev.chat_env import ChatEnv, ChatEnvConfig
from chatdev.statistics import get_info
from camel.web_spider import modal_trans
from chatdev.utils import log_visualize, now


Expand Down Expand Up @@ -59,6 +60,7 @@ def __init__(self,
# init chatchain config and recruitments
self.chain = self.config["chain"]
self.recruitments = self.config["recruitments"]
self.web_spider = self.config["web_spider"]

# init default max chat turn
self.chat_turn_limit_default = 10
Expand Down Expand Up @@ -243,6 +245,8 @@ def pre_processing(self):
self.chat_env.env_dict['task_prompt'] = self.self_task_improve(self.task_prompt_raw)
else:
self.chat_env.env_dict['task_prompt'] = self.task_prompt_raw
if(check_bool(self.web_spider)):
self.chat_env.env_dict['task_description'] = modal_trans(self.task_prompt_raw)

def post_processing(self):
"""
Expand Down
1 change: 1 addition & 0 deletions chatdev/chat_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, chat_env_config: ChatEnvConfig):
self.env_dict = {
"directory": "",
"task_prompt": "",
"task_description":"",
"modality": "",
"ideas": "",
"language": "",
Expand Down
3 changes: 3 additions & 0 deletions chatdev/phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ def __init__(self, **kwargs):

def update_phase_env(self, chat_env):
self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description":"chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas']})

Expand All @@ -345,6 +346,7 @@ def update_phase_env(self, chat_env):
gui = "" if not chat_env.config.gui_design \
else "The software should be equipped with graphical user interface (GUI) so that user can visually and graphically use it; so you must choose a GUI framework (e.g., in Python, you can implement GUI via tkinter, Pygame, Flexx, PyGUI, etc,)."
self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description": "chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas'],
"language": chat_env.env_dict['language'],
Expand All @@ -366,6 +368,7 @@ def __init__(self, **kwargs):

def update_phase_env(self, chat_env):
self.phase_env = {"task": chat_env.env_dict['task_prompt'],
"description": chat_env.env_dict['task_description'],
"language": chat_env.env_dict['language'],
"codes": chat_env.get_codes()}

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ tiktoken==0.4.0
virtualenv==20.23.0
Werkzeug==2.3.6
Markdown==3.4.4
Pillow==10.1.0
Pillow==10.1.0
Wikipedia-API==0.6.0

0 comments on commit c7991c1

Please sign in to comment.