update

Jasson · Oct 3, 2024 · 279e22e · 279e22e
2 parents 8ee001b + 3cd615c
commit 279e22e
Show file tree

Hide file tree

Showing 29 changed files with 391 additions and 140 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -17,7 +17,7 @@ Please follow the [fork and pull request](https://docs.github.com/en/get-started
 1. Clone the repository.
 2. Create a virtual environment:
    - For Unix, use `./scripts/create_venv.sh`.
-   - For Windows, use `.\scripts\create_venv_win.bat`.
+   - For Windows, use `.\scripts\create_venv.bat`.
    - This setup will:
      - Create a `phienv` virtual environment in the current directory.
      - Install the required packages.

diff --git a/cookbook/assistants/cli.py b/cookbook/assistants/cli.py
@@ -1,5 +1,12 @@
 from phi.assistant import Assistant
 from phi.tools.duckduckgo import DuckDuckGo
 
-assistant = Assistant(tools=[DuckDuckGo()], show_tool_calls=True, read_chat_history=True)
+assistant = Assistant(
+    tools=[DuckDuckGo()],
+    show_tool_calls=True,
+    read_chat_history=True,
+    debug_mode=True,
+    add_chat_history_to_messages=True,
+    num_history_messages=3,
+)
 assistant.cli_app(markdown=True)
diff --git a/cookbook/assistants/examples/auto_rag/README.md b/cookbook/assistants/examples/auto_rag/README.md
@@ -57,7 +57,7 @@ streamlit run cookbook/examples/auto_rag/app.py
 ```
 
 - Open [localhost:8501](http://localhost:8501) to view your RAG app.
-- Add websites or PDFs and ask question.
+- Add websites, docx, csv, txt, and PDFs then ask a question.
 
 - Example Website: https://techcrunch.com/2024/04/18/meta-releases-llama-3-claims-its-among-the-best-open-models-available/
 - Ask questions like:

diff --git a/cookbook/assistants/examples/auto_rag/app.py b/cookbook/assistants/examples/auto_rag/app.py
@@ -4,8 +4,11 @@
 import streamlit as st
 from phi.assistant import Assistant
 from phi.document import Document
-from phi.document.reader.pdf import PDFReader
 from phi.document.reader.website import WebsiteReader
+from phi.document.reader.pdf import PDFReader
+from phi.document.reader.text import TextReader
+from phi.document.reader.docx import DocxReader
+from phi.document.reader.csv_reader import CSVReader
 from phi.utils.log import logger
 
 from assistant import get_auto_rag_assistant  # type: ignore
@@ -117,13 +120,22 @@ def main() -> None:
             st.session_state["file_uploader_key"] = 100
 
         uploaded_file = st.sidebar.file_uploader(
-            "Add a PDF :page_facing_up:", type="pdf", key=st.session_state["file_uploader_key"]
+            "Add a Document (.pdf, .csv, .txt, or .docx) :page_facing_up:", key=st.session_state["file_uploader_key"]
         )
         if uploaded_file is not None:
-            alert = st.sidebar.info("Processing PDF...", icon="🧠")
+            alert = st.sidebar.info("Processing document...", icon="🧠")
             auto_rag_name = uploaded_file.name.split(".")[0]
             if f"{auto_rag_name}_uploaded" not in st.session_state:
-                reader = PDFReader()
+                file_type = uploaded_file.name.split(".")[-1].lower()
+
+                if file_type == "pdf":
+                    reader = PDFReader()
+                elif file_type == "csv":
+                    reader = CSVReader()
+                elif file_type == "txt":
+                    reader = TextReader()
+                elif file_type == "docx":
+                    reader = DocxReader()
                 auto_rag_documents: List[Document] = reader.read(uploaded_file)
                 if auto_rag_documents:
                     auto_rag_assistant.knowledge_base.load_documents(auto_rag_documents, upsert=True)

diff --git a/cookbook/assistants/llms/claude/prompt_caching.py b/cookbook/assistants/llms/claude/prompt_caching.py
@@ -0,0 +1,46 @@
+# Inspired by: https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
+import requests
+from bs4 import BeautifulSoup
+
+from phi.assistant import Assistant
+from phi.llm.anthropic import Claude
+
+
+def fetch_article_content(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, "html.parser")
+    # Remove script and style elements
+    for script in soup(["script", "style"]):
+        script.decompose()
+    # Get text
+    text = soup.get_text()
+    # Break into lines and remove leading and trailing space on each
+    lines = (line.strip() for line in text.splitlines())
+    # Break multi-headlines into a line each
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    # Drop blank lines
+    text = "\n".join(chunk for chunk in chunks if chunk)
+    return text
+
+
+# Fetch the content of the article
+book_url = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt"
+book_content = fetch_article_content(book_url)
+
+print(f"Fetched {len(book_content)} characters from the book.")
+
+assistant = Assistant(
+    llm=Claude(
+        model="claude-3-5-sonnet-20240620",
+        cache_system_prompt=True,
+    ),
+    system_prompt=book_content[:10000],
+    debug_mode=True,
+)
+assistant.print_response("Give me a one line summary of this book", markdown=True, stream=True)
+print("Prompt cache creation tokens: ", assistant.llm.metrics["cache_creation_tokens"])  # type: ignore
+print("Prompt cache read tokens: ", assistant.llm.metrics["cache_read_tokens"])  # type: ignore
+
+# assistant.print_response("Give me a one line summary of this book", markdown=True, stream=False)
+# print("Prompt cache creation tokens: ", assistant.llm.metrics["cache_creation_tokens"])
+# print("Prompt cache read tokens: ", assistant.llm.metrics["cache_read_tokens"])
diff --git a/cookbook/assistants/llms/deepseek/README.md b/cookbook/assistants/llms/deepseek/README.md
@@ -0,0 +1,34 @@
+## DeepSeek
+
+> Note: Fork and clone this repository if needed
+
+1. Create a virtual environment
+
+```shell
+python3 -m venv venv
+source venv/bin/activate
+```
+
+2. Install libraries
+
+```shell
+pip install -U openai phidata
+```
+
+3. Export `DEEPSEEK_API_KEY`
+
+```shell
+export DEEPSEEK_API_KEY=***
+```
+
+4. Test Structured output
+
+```shell
+python cookbook/llms/deepseek/pydantic_output.py
+```
+
+5. Test function calling
+
+```shell
+python cookbook/llms/deepseek/tool_call.py
+```
diff --git a/cookbook/assistants/llms/deepseek/pydantic_output.py b/cookbook/assistants/llms/deepseek/pydantic_output.py
@@ -0,0 +1,20 @@
+from phi.assistant import Assistant
+from phi.llm.deepseek import DeepSeekChat
+from phi.tools.yfinance import YFinanceTools
+from pydantic import BaseModel, Field
+
+
+class StockPrice(BaseModel):
+    ticker: str = Field(..., example="NVDA")
+    price: float = Field(..., example=100.0)
+    currency: str = Field(..., example="USD")
+
+
+assistant = Assistant(
+    llm=DeepSeekChat(),
+    tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True, company_news=True)],
+    show_tool_calls=True,
+    markdown=True,
+    output_model=StockPrice,
+)
+assistant.print_response("Write a comparison between NVDA and AMD.")
diff --git a/cookbook/assistants/llms/deepseek/tool_call.py b/cookbook/assistants/llms/deepseek/tool_call.py
@@ -0,0 +1,11 @@
+from phi.assistant import Assistant
+from phi.llm.deepseek import DeepSeekChat
+from phi.tools.yfinance import YFinanceTools
+
+assistant = Assistant(
+    llm=DeepSeekChat(),
+    tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True, company_news=True)],
+    show_tool_calls=True,
+    markdown=True,
+)
+assistant.print_response("Write a comparison between NVDA and AMD, use all tools available.")
diff --git a/cookbook/assistants/llms/openai/auto_rag/requirements.in b/cookbook/assistants/llms/openai/auto_rag/requirements.in
@@ -9,3 +9,6 @@ streamlit
 bs4
 duckduckgo-search
 nest_asyncio
+textract==1.6.3
+python-docx
+lxml
diff --git a/cookbook/assistants/tools/firecrawl_tools.py b/cookbook/assistants/tools/firecrawl_tools.py
@@ -1,5 +1,13 @@
+# pip install firecrawl-py openai
+
+import os
+
 from phi.assistant import Assistant
 from phi.tools.firecrawl import FirecrawlTools
 
-assistant = Assistant(tools=[FirecrawlTools()], show_tool_calls=True, markdown=True)
-assistant.print_response("Tell me about https://github.com/phidatahq/phidata")
+api_key = os.getenv("FIRECRAWL_API_KEY")
+
+assistant = Assistant(
+    tools=[FirecrawlTools(api_key=api_key, scrape=False, crawl=True)], show_tool_calls=True, markdown=True
+)
+assistant.print_response("summarize this https://finance.yahoo.com/")
diff --git a/phi/assistant/assistant.py b/phi/assistant/assistant.py
@@ -31,7 +31,7 @@
 from phi.storage.assistant import AssistantStorage
 from phi.utils.format_str import remove_indent
 from phi.tools import Tool, Toolkit, Function
-from phi.utils.log import logger, set_log_level_to_debug
+from phi.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
 from phi.utils.message import get_text_from_message
 from phi.utils.merge_dict import merge_dictionaries
 from phi.utils.timer import Timer
@@ -209,6 +209,10 @@ def set_log_level(cls, v: bool) -> bool:
         if v:
             set_log_level_to_debug()
             logger.debug("Debug logs enabled")
+        else:
+            set_log_level_to_info()
+            logger.info("Debug logs disabled")
+
         return v
 
     @field_validator("run_id", mode="before")
@@ -848,7 +852,9 @@ def _run(
 
         # -*- Add chat history to the messages list
         if self.add_chat_history_to_messages:
-            llm_messages += self.memory.get_last_n_messages(last_n=self.num_history_messages)
+            llm_messages += self.memory.get_last_n_messages_starting_from_the_user_message(
+                last_n=self.num_history_messages
+            )
 
         # -*- Build the User prompt
         # References to add to the user_prompt if add_references_to_prompt is True
@@ -1055,7 +1061,9 @@ async def _arun(
         # -*- Add chat history to the messages list
         if self.add_chat_history_to_messages:
             if self.memory is not None:
-                llm_messages += self.memory.get_last_n_messages(last_n=self.num_history_messages)
+                llm_messages += self.memory.get_last_n_messages_starting_from_the_user_message(
+                    last_n=self.num_history_messages
+                )
 
         # -*- Build the User prompt
         # References to add to the user_prompt if add_references_to_prompt is True

diff --git a/phi/document/reader/csv_reader.py b/phi/document/reader/csv_reader.py
@@ -1,30 +1,37 @@
 import csv
 from pathlib import Path
-from typing import List
-
+from typing import List, Union, IO, Any
 from phi.document.base import Document
 from phi.document.reader.base import Reader
 from phi.utils.log import logger
+import io
 
 
 class CSVReader(Reader):
     """Reader for CSV files"""
 
-    def read(self, path: Path, delimiter: str = " ", quotechar: str = "|") -> List[Document]:
-        if not path:
-            raise ValueError("No path provided")
-
-        if not path.exists():
-            raise FileNotFoundError(f"Could not find file: {path}")
+    def read(self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"') -> List[Document]:
+        if not file:
+            raise ValueError("No file provided")
 
         try:
-            logger.info(f"Reading: {path}")
-            csv_name = path.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
+            if isinstance(file, Path):
+                if not file.exists():
+                    raise FileNotFoundError(f"Could not find file: {file}")
+                logger.info(f"Reading: {file}")
+                file_content = file.open(newline="", mode="r", encoding="utf-8")
+            else:
+                logger.info(f"Reading uploaded file: {file.name}")
+                file.seek(0)
+                file_content = io.StringIO(file.read().decode("utf-8"))
+
+            csv_name = Path(file.name).stem if isinstance(file, Path) else file.name.split(".")[0]
             csv_content = ""
-            with open(path, newline="") as csvfile:
+            with file_content as csvfile:
                 csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
                 for row in csv_reader:
-                    csv_content += ", ".join(row)
+                    csv_content += ", ".join(row) + "\n"
+
             documents = [
                 Document(
                     name=csv_name,
@@ -39,5 +46,5 @@ def read(self, path: Path, delimiter: str = " ", quotechar: str = "|") -> List[D
                 return chunked_documents
             return documents
         except Exception as e:
-            logger.error(f"Error reading: {path}: {e}")
-        return []
+            logger.error(f"Error reading: {file.name if isinstance(file, IO) else file}: {e}")
+            return []
diff --git a/phi/document/reader/docx.py b/phi/document/reader/docx.py
@@ -1,35 +1,36 @@
 from pathlib import Path
-from typing import List
-
+from typing import List, Union
 from phi.document.base import Document
 from phi.document.reader.base import Reader
 from phi.utils.log import logger
+import io
+from docx import Document as DocxDocument
 
 
 class DocxReader(Reader):
     """Reader for Doc/Docx files"""
 
-    def read(self, path: Path) -> List[Document]:
-        if not path:
-            raise ValueError("No path provided")
-
-        if not path.exists():
-            raise FileNotFoundError(f"Could not find file: {path}")
+    def read(self, file: Union[Path, io.BytesIO]) -> List[Document]:
+        if not file:
+            raise ValueError("No file provided")
 
         try:
-            import textract  # noqa: F401
-        except ImportError:
-            raise ImportError("`textract` not installed")
+            if isinstance(file, Path):
+                logger.info(f"Reading: {file}")
+                docx_document = DocxDocument(file)
+                doc_name = file.stem
+            else:  # Handle file-like object from upload
+                logger.info(f"Reading uploaded file: {file.name}")
+                docx_document = DocxDocument(file)
+                doc_name = file.name.split(".")[0]
+
+            doc_content = "\n\n".join([para.text for para in docx_document.paragraphs])
 
-        try:
-            logger.info(f"Reading: {path}")
-            doc_name = path.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-            doc_content = textract.process(path)
             documents = [
                 Document(
                     name=doc_name,
                     id=doc_name,
-                    content=doc_content.decode("utf-8"),
+                    content=doc_content,
                 )
             ]
             if self.chunk:
@@ -39,5 +40,5 @@ def read(self, path: Path) -> List[Document]:
                 return chunked_documents
             return documents
         except Exception as e:
-            logger.error(f"Error reading: {path}: {e}")
-        return []
+            logger.error(f"Error reading file: {e}")
+            return []