Skip to content

Commit

Permalink
fix parsing for non-html webpages such as .py
Browse files Browse the repository at this point in the history
  • Loading branch information
tuhahaha authored and JianxinMa committed Feb 28, 2024
1 parent e891ce6 commit 9c9af84
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 10 deletions.
4 changes: 2 additions & 2 deletions qwen_agent/memory/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
ROLE, USER, Message)
from qwen_agent.log import logger
from qwen_agent.prompts import GenKeyword
from qwen_agent.utils.utils import get_file_type, is_local_path
from qwen_agent.utils.utils import get_file_type


class Memory(Agent):
Expand Down Expand Up @@ -53,7 +53,7 @@ def _run(self,
for file in files:
if (file.split('.')[-1].lower() in [
'pdf', 'docx', 'pptx'
]) or (not is_local_path(file) and get_file_type(file) == 'html'):
]) or get_file_type(file) == 'html':
rag_files.append(file)

if not rag_files:
Expand Down
2 changes: 1 addition & 1 deletion qwen_server/add_qwen_libs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
from pathlib import Path

# A temporary solution. We should use `python setup.py develop` in the future.
# This can be removed, if install qwen_agent by `pip install -e ./`
sys.path.insert(0, str(Path(__file__).absolute().parent.parent))
8 changes: 6 additions & 2 deletions qwen_server/assistant_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
import time
from pathlib import Path

import add_qwen_libs # NOQA
try:
import add_qwen_libs # NOQA
except ImportError:
pass

import gradio as gr
import jsonlines

Expand Down Expand Up @@ -105,7 +109,7 @@ def bot(history):


def init_chatbot():
time.sleep(0.5)
time.sleep(1)
page_url = set_url()
response = read_meta_data_by_condition(meta_file, url=page_url)
if not response:
Expand Down
18 changes: 16 additions & 2 deletions qwen_server/database_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import os
from pathlib import Path

import add_qwen_libs # NOQA
try:
import add_qwen_libs # NOQA
except ImportError:
pass
import json5
import jsonlines
import uvicorn
Expand All @@ -14,7 +17,8 @@

from qwen_agent.log import logger
from qwen_agent.memory import Memory
from qwen_agent.utils.utils import get_local_ip
from qwen_agent.utils.utils import (get_basename_from_url, get_local_ip,
save_text_to_file)
from qwen_server.schema import GlobalConfig
from qwen_server.utils import (rm_browsing_meta_data, save_browsing_meta_data,
save_history)
Expand Down Expand Up @@ -57,6 +61,8 @@


def update_pop_url(url: str):
url = os.path.join(server_config.path.download_root,
get_basename_from_url(url))
new_line = {'url': url}

with jsonlines.open(cache_file_popup_url, mode='w') as writer:
Expand All @@ -76,6 +82,14 @@ def change_checkbox_state(key):

def cache_page(**kwargs):
url = kwargs.get('url', '')

page_content = kwargs.get('content', '')
if page_content:
# map to local url
url = os.path.join(server_config.path.download_root,
get_basename_from_url(url))
save_text_to_file(url, page_content)

save_browsing_meta_data(url, '[CACHING]', meta_file)
# rm history
save_history(None, url, history_dir)
Expand Down
7 changes: 4 additions & 3 deletions qwen_server/workstation_server.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# need refactor

import datetime
import json
import os
from pathlib import Path

import add_qwen_libs # NOQA
import gradio as gr
import json5

try:
import add_qwen_libs # NOQA
except ImportError:
pass
from qwen_agent.agents import ArticleAgent, DocQAAgent, ReActChat
from qwen_agent.llm import get_chat_model
from qwen_agent.llm.base import ModelServiceError
Expand Down
50 changes: 50 additions & 0 deletions tests/qwen_server/test_database_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import json
import os
import shutil
from pathlib import Path

from qwen_agent.utils.utils import get_basename_from_url
from qwen_server.schema import GlobalConfig
from qwen_server.utils import read_meta_data_by_condition


def test_database_server():
server_config_path = Path(__file__).resolve(
).parent.parent.parent / 'qwen_server/server_config.json'
with open(server_config_path, 'r') as f:
server_config = json.load(f)
server_config = GlobalConfig(**server_config)
if os.path.exists('workspace'):
shutil.rmtree('workspace')
os.makedirs(server_config.path.work_space_root)
os.makedirs(server_config.path.database_root)
os.makedirs(server_config.path.download_root)
os.makedirs(server_config.path.code_interpreter_ws)

# cache
from qwen_server.database_server import cache_page, update_pop_url

data = {
'url':
'https://github.com/QwenLM/Qwen-Agent',
'content':
'<p>Qwen-Agent is a framework for developing LLM applications based on the instruction following, tool usage, planning, and memory capabilities of Qwen. </p>'
}
cache_page(**data)

new_url = os.path.join(server_config.path.download_root,
get_basename_from_url(data['url']))
assert os.path.exists(new_url)

meta_file = os.path.join(server_config.path.work_space_root,
'meta_data.jsonl')
assert os.path.exists(meta_file)
res = read_meta_data_by_condition(meta_file, url=new_url)
assert isinstance(res, dict)
assert res['url'] == new_url

# pop up
update_pop_url(new_url)
cache_file_popup_url = os.path.join(server_config.path.work_space_root,
'popup_url.jsonl')
assert os.path.exists(cache_file_popup_url)

0 comments on commit 9c9af84

Please sign in to comment.