feat: save cost by cache index file

CRyan2016 · Mar 20, 2023 · 466820d · 466820d
1 parent e902ef0
commit 466820d
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 30 deletions.
diff --git a/.env_sample b/.env_sample
@@ -0,0 +1,6 @@
+export CF_ACCESS_CLIENT_ID='your_client_id'
+export CF_ACCESS_CLIENT_SECRET='your_client_secret'
+export PHANTOMJSCLOUD_API_KEY='your_api_key'
+export OPENAI_API_KEY='your_api_key'
+export SLACK_TOKEN='your_slack_token'
+export SLACK_SIGNING_SECRET='your_slack_signing_secret'
diff --git a/README.md b/README.md
@@ -33,7 +33,8 @@ For now it is in development, but you can try it out by join this [channel](http
 - Cost saving
   - [x] by caching the web page llama index
     - ~~Consider to use [sqlite-vss](https://github.com/asg017/sqlite-vss) to store and search the text embeddings~~
-    - Use [chromadb](https://github.com/chroma-core/chroma) to store and search the text embeddings
+    - ~~Use [chromadb](https://github.com/chroma-core/chroma) to store and search the text embeddings~~
+    - Use the llama index file to restore the index
   - [x] Consider to use [sentence-transformers](https://github.com/UKPLab/sentence-transformers) or [txtai](https://github.com/neuml/txtai) to generate [embeddings](https://github.com/asg017/sqlite-vss/blob/main/examples/headlines/build/add_embeddings.py) (vectors)
     - Not good as the embeddings of OpenAI, rollback to use the OpenAI embeddings, and if enable to use the custom embeddings, the minimum of server's memory is 2GB which still increase the cost.
   - [ ] Consider to fine-tue the chunk size of index node and prompt to save the cost

diff --git a/app/gpt.py b/app/gpt.py
@@ -1,27 +1,24 @@
 
 import os
+import logging
 import hashlib
 import openai
-from llama_index import GPTChromaIndex, LLMPredictor, RssReader
+from llama_index import GPTSimpleVectorIndex, LLMPredictor, RssReader
 from llama_index.prompts.prompts import QuestionAnswerPrompt
 from llama_index.readers.schema.base import Document
-# from llama_index import LangchainEmbedding
-# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.chat_models import ChatOpenAI
-from chromadb.config import Settings
-import chromadb
 
 from app.fetch_web_post import get_urls, scrape_website, scrape_website_by_phantomjscloud
 
 OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
 openai.api_key = OPENAI_API_KEY
 
-chroma_client = chromadb.Client(Settings(
-    chroma_db_impl="duckdb+parquet",
-    persist_directory="/data/myGPTReader/chroma_db",
-))
+llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo"))
 
-# embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))
+index_cache_web_dir = '/tmp/myGPTReader/cache_web/'
+
+if not os.path.exists(index_cache_web_dir):
+    os.makedirs(index_cache_web_dir)
 
 def get_unique_md5(urls):
     urls_str = ''.join(sorted(urls))
@@ -45,15 +42,15 @@ def get_documents_from_urls(urls):
             documents.append(document)
     return documents
 
-def get_answer_from_chatGPT(messages, logger):
+def get_answer_from_chatGPT(messages):
     dialog_messages = format_dialog_messages(messages)
-    logger.info('=====> Use chatGPT to answer!')
-    logger.info(dialog_messages)
+    logging.info('=====> Use chatGPT to answer!')
+    logging.info(dialog_messages)
     completion = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[{"role": "user", "content": dialog_messages}]
     )
-    logger.info(completion.usage)
+    logging.info(completion.usage)
     return completion.choices[0].message.content
 
 QUESTION_ANSWER_PROMPT_TMPL = (
@@ -65,16 +62,26 @@ def get_answer_from_chatGPT(messages, logger):
 )
 QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)
 
-def get_answer_from_llama_web(messages, urls, logger):
+def get_index_from_web_cache(name):
+    if not os.path.exists(index_cache_web_dir + name):
+        return None
+    index = GPTSimpleVectorIndex.load_from_disk(index_cache_web_dir + name)
+    logging.info(f"=====> Get index from cache: {index_cache_web_dir + name}")
+    return index
+
+def get_answer_from_llama_web(messages, urls):
     dialog_messages = format_dialog_messages(messages)
-    logger.info('=====> Use llama with chatGPT to answer!')
-    logger.info(dialog_messages)
+    logging.info('=====> Use llama with chatGPT to answer!')
+    logging.info(dialog_messages)
     combained_urls = get_urls(urls)
-    logger.info(combained_urls)
-    documents = get_documents_from_urls(combained_urls)
-    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo"))
-    logger.info(documents)
-    chroma_collection = chroma_client.get_or_create_collection(get_unique_md5(urls))
-    index = GPTChromaIndex(documents, chroma_collection=chroma_collection)
-    # index = GPTChromaIndex(documents, chroma_collection=chroma_collection, embed_model=embed_model) # Not good, rollback to OpenAI
-    return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)
+    logging.info(combained_urls)
+    index_file_name = get_unique_md5(urls)
+    index = get_index_from_web_cache(index_file_name)
+    if index is None:
+        logging.info(f"=====> Build index from web!")
+        documents = get_documents_from_urls(combained_urls)
+        logging.info(documents)
+        index = GPTSimpleVectorIndex(documents)
+        logging.info(f"=====> Save index to disk path: {index_cache_web_dir + index_file_name}")
+        index.save_to_disk(index_cache_web_dir + index_file_name)
+    return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)
diff --git a/app/server.py b/app/server.py
@@ -116,10 +116,10 @@ def handle_mentions(event, say, logger):
 
     # TODO: https://github.com/jerryjliu/llama_index/issues/778
     # if it can get the context_str, then put this prompt into the thread_message_history to provide more context to the chatGPT
-    if len(extract_urls_from_event(event)) > 0: # if this conversation has urls, use llama with all urls in this thread
-        future = executor.submit(get_answer_from_llama_web, thread_message_history[parent_thread_ts]['dialog_texts'], list(urls), logger)
+    if len(urls) > 0: # if this conversation has urls, use llama with all urls in this thread
+        future = executor.submit(get_answer_from_llama_web, thread_message_history[parent_thread_ts]['dialog_texts'], list(urls))
     else:
-        future = executor.submit(get_answer_from_chatGPT, thread_message_history[parent_thread_ts]['dialog_texts'], logger)
+        future = executor.submit(get_answer_from_chatGPT, thread_message_history[parent_thread_ts]['dialog_texts'])
 
     try:
         gpt_response = future.result(timeout=300)

diff --git a/docs/chatGPT/CDDR.md b/docs/chatGPT/CDDR.md
@@ -2710,3 +2710,61 @@ print(html_content)
 In this example, we first parse the RSS feed using the `feedparser` library. Then, we get the link for the latest update by accessing the `.link` attribute of the first item in the `entries` list of the feed object.
 
 We then use the `requests` library to fetch the HTML content of the latest link. Finally, we can do whatever we need to with the HTML content (e.g., parse it using BeautifulSoup, extract data from it, etc.).
+
+---
+
+## 2023-03-20
+
+---
+
+You
+---
+
+
+
+. ├── LICENSE ├── Procfile ├── README.md ├── \_\_pycache\_\_ │ ├── server.cpython-310.pyc │ └── slash\_command.cpython-310.pyc ├── app │ ├── \_\_init\_\_.py │ ├── \_\_pycache\_\_ │ │ ├── \_\_init\_\_.cpython-310.pyc │ │ ├── fetch\_web\_post.cpython-310.pyc │ │ ├── server.cpython-310.pyc │ │ └── slash\_command.cpython-310.pyc │ ├── daily\_hot\_news.py │ ├── data │ │ ├── hot\_news\_rss.json │ │ └── prompt.json │ ├── fetch\_web\_post.py │ ├── gpt.py │ ├── server.py │ └── slash\_command.py ├── docs │ └── chatGPT │ └── CDDR.md ├── fly.toml └── requirements.txt this is my python project, but when I run \`python app/gpt.py\`, it show errors: File "/Users/dawei.ma/Workspace/personal/side\_project/myGPTReader/app/gpt.py", line 11, in <module> from app.fetch\_web\_post import get\_urls, scrape\_website, scrape\_website\_by\_phantomjscloud ModuleNotFoundError: No module named 'app'
+
+ChatGPT
+-------
+
+Based on the error message, it looks like Python is unable to find a module named 'app', which is where `fetch_web_post` resides. This might be due to the way you're running the `gpt.py` script.
+
+One solution is to add an empty file named `__init__.py` in the root directory and in the `app` directory. This file signals to Python that these directories represent packages, which may contain modules.
+
+Another solution could be to modify the import statements in your `gpt.py` file as follows:
+
+```
+from fetch_web_post import get_urls, scrape_website, scrape_website_by_phantomjscloud
+```
+
+This should work if you're running the `gpt.py` script from the root directory of your project using the command `python app/gpt.py`.
+
+You
+---
+
+
+
+but if I not use the app.xxx, when run gunicorn app.server:app, it shows can not find the daily\_hot\_news module
+
+ChatGPT
+-------
+
+It looks like you might have an issue with importing modules correctly in your Python project.
+
+Based on the error message that you see, it looks like `gpt.py` cannot find the `fetch_web_post` module because you're using a relative import from the `app` package instead of an absolute one.
+
+To resolve this issue, try changing the import statement in `gpt.py` to use an absolute import that references the root level of your project:
+
+```python
+from myGPTReader.app.fetch_web_post import get_urls, scrape_website, scrape_website_by_phantomjscloud
+```
+
+This way, Python will always look for the `myGPTReader` package at the root level and can find all the packages inside.
+
+Regarding the issue you're having when running `gunicorn app.server:app`, if you're receiving an error related to the `daily_hot_news` module not being found, make sure that the `daily_hot_news` module is actually in the `app` directory and that it has been imported correctly within the `server.py` module. If it's still not working, try adding the `app` directory to your Python path like so:
+
+```shell
+export PYTHONPATH="${PYTHONPATH}:/path/to/myGPTReader/app"
+```
+
+This should allow Python to find all the necessary modules even when you're running `gunicorn` from outside the `app` directory.
diff --git a/requirements.txt b/requirements.txt
@@ -12,5 +12,4 @@ requests==2.28.2
 html2text==2020.1.16
 feedparser==6.0.10
 validators==0.20.0
-chromadb==0.3.11
 Flask-APScheduler==1.12.4