Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed Oct 3, 2024
2 parents 8ee001b + 3cd615c commit 279e22e
Show file tree
Hide file tree
Showing 29 changed files with 391 additions and 140 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Please follow the [fork and pull request](https://docs.github.com/en/get-started
1. Clone the repository.
2. Create a virtual environment:
- For Unix, use `./scripts/create_venv.sh`.
- For Windows, use `.\scripts\create_venv_win.bat`.
- For Windows, use `.\scripts\create_venv.bat`.
- This setup will:
- Create a `phienv` virtual environment in the current directory.
- Install the required packages.
Expand Down
9 changes: 8 additions & 1 deletion cookbook/assistants/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
from phi.assistant import Assistant
from phi.tools.duckduckgo import DuckDuckGo

assistant = Assistant(tools=[DuckDuckGo()], show_tool_calls=True, read_chat_history=True)
assistant = Assistant(
tools=[DuckDuckGo()],
show_tool_calls=True,
read_chat_history=True,
debug_mode=True,
add_chat_history_to_messages=True,
num_history_messages=3,
)
assistant.cli_app(markdown=True)
2 changes: 1 addition & 1 deletion cookbook/assistants/examples/auto_rag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ streamlit run cookbook/examples/auto_rag/app.py
```

- Open [localhost:8501](http://localhost:8501) to view your RAG app.
- Add websites or PDFs and ask question.
- Add websites, docx, csv, txt, and PDFs then ask a question.

- Example Website: https://techcrunch.com/2024/04/18/meta-releases-llama-3-claims-its-among-the-best-open-models-available/
- Ask questions like:
Expand Down
20 changes: 16 additions & 4 deletions cookbook/assistants/examples/auto_rag/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
import streamlit as st
from phi.assistant import Assistant
from phi.document import Document
from phi.document.reader.pdf import PDFReader
from phi.document.reader.website import WebsiteReader
from phi.document.reader.pdf import PDFReader
from phi.document.reader.text import TextReader
from phi.document.reader.docx import DocxReader
from phi.document.reader.csv_reader import CSVReader
from phi.utils.log import logger

from assistant import get_auto_rag_assistant # type: ignore
Expand Down Expand Up @@ -117,13 +120,22 @@ def main() -> None:
st.session_state["file_uploader_key"] = 100

uploaded_file = st.sidebar.file_uploader(
"Add a PDF :page_facing_up:", type="pdf", key=st.session_state["file_uploader_key"]
"Add a Document (.pdf, .csv, .txt, or .docx) :page_facing_up:", key=st.session_state["file_uploader_key"]
)
if uploaded_file is not None:
alert = st.sidebar.info("Processing PDF...", icon="🧠")
alert = st.sidebar.info("Processing document...", icon="🧠")
auto_rag_name = uploaded_file.name.split(".")[0]
if f"{auto_rag_name}_uploaded" not in st.session_state:
reader = PDFReader()
file_type = uploaded_file.name.split(".")[-1].lower()

if file_type == "pdf":
reader = PDFReader()
elif file_type == "csv":
reader = CSVReader()
elif file_type == "txt":
reader = TextReader()
elif file_type == "docx":
reader = DocxReader()
auto_rag_documents: List[Document] = reader.read(uploaded_file)
if auto_rag_documents:
auto_rag_assistant.knowledge_base.load_documents(auto_rag_documents, upsert=True)
Expand Down
46 changes: 46 additions & 0 deletions cookbook/assistants/llms/claude/prompt_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Inspired by: https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
import requests
from bs4 import BeautifulSoup

from phi.assistant import Assistant
from phi.llm.anthropic import Claude


def fetch_article_content(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text
text = soup.get_text()
# Break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# Break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# Drop blank lines
text = "\n".join(chunk for chunk in chunks if chunk)
return text


# Fetch the content of the article
book_url = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt"
book_content = fetch_article_content(book_url)

print(f"Fetched {len(book_content)} characters from the book.")

assistant = Assistant(
llm=Claude(
model="claude-3-5-sonnet-20240620",
cache_system_prompt=True,
),
system_prompt=book_content[:10000],
debug_mode=True,
)
assistant.print_response("Give me a one line summary of this book", markdown=True, stream=True)
print("Prompt cache creation tokens: ", assistant.llm.metrics["cache_creation_tokens"]) # type: ignore
print("Prompt cache read tokens: ", assistant.llm.metrics["cache_read_tokens"]) # type: ignore

# assistant.print_response("Give me a one line summary of this book", markdown=True, stream=False)
# print("Prompt cache creation tokens: ", assistant.llm.metrics["cache_creation_tokens"])
# print("Prompt cache read tokens: ", assistant.llm.metrics["cache_read_tokens"])
34 changes: 34 additions & 0 deletions cookbook/assistants/llms/deepseek/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
## DeepSeek

> Note: Fork and clone this repository if needed
1. Create a virtual environment

```shell
python3 -m venv venv
source venv/bin/activate
```

2. Install libraries

```shell
pip install -U openai phidata
```

3. Export `DEEPSEEK_API_KEY`

```shell
export DEEPSEEK_API_KEY=***
```

4. Test Structured output

```shell
python cookbook/llms/deepseek/pydantic_output.py
```

5. Test function calling

```shell
python cookbook/llms/deepseek/tool_call.py
```
20 changes: 20 additions & 0 deletions cookbook/assistants/llms/deepseek/pydantic_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from phi.assistant import Assistant
from phi.llm.deepseek import DeepSeekChat
from phi.tools.yfinance import YFinanceTools
from pydantic import BaseModel, Field


class StockPrice(BaseModel):
ticker: str = Field(..., example="NVDA")
price: float = Field(..., example=100.0)
currency: str = Field(..., example="USD")


assistant = Assistant(
llm=DeepSeekChat(),
tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True, company_news=True)],
show_tool_calls=True,
markdown=True,
output_model=StockPrice,
)
assistant.print_response("Write a comparison between NVDA and AMD.")
11 changes: 11 additions & 0 deletions cookbook/assistants/llms/deepseek/tool_call.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from phi.assistant import Assistant
from phi.llm.deepseek import DeepSeekChat
from phi.tools.yfinance import YFinanceTools

assistant = Assistant(
llm=DeepSeekChat(),
tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True, company_news=True)],
show_tool_calls=True,
markdown=True,
)
assistant.print_response("Write a comparison between NVDA and AMD, use all tools available.")
3 changes: 3 additions & 0 deletions cookbook/assistants/llms/openai/auto_rag/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ streamlit
bs4
duckduckgo-search
nest_asyncio
textract==1.6.3
python-docx
lxml
12 changes: 10 additions & 2 deletions cookbook/assistants/tools/firecrawl_tools.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# pip install firecrawl-py openai

import os

from phi.assistant import Assistant
from phi.tools.firecrawl import FirecrawlTools

assistant = Assistant(tools=[FirecrawlTools()], show_tool_calls=True, markdown=True)
assistant.print_response("Tell me about https://github.com/phidatahq/phidata")
api_key = os.getenv("FIRECRAWL_API_KEY")

assistant = Assistant(
tools=[FirecrawlTools(api_key=api_key, scrape=False, crawl=True)], show_tool_calls=True, markdown=True
)
assistant.print_response("summarize this https://finance.yahoo.com/")
14 changes: 11 additions & 3 deletions phi/assistant/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from phi.storage.assistant import AssistantStorage
from phi.utils.format_str import remove_indent
from phi.tools import Tool, Toolkit, Function
from phi.utils.log import logger, set_log_level_to_debug
from phi.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
from phi.utils.message import get_text_from_message
from phi.utils.merge_dict import merge_dictionaries
from phi.utils.timer import Timer
Expand Down Expand Up @@ -209,6 +209,10 @@ def set_log_level(cls, v: bool) -> bool:
if v:
set_log_level_to_debug()
logger.debug("Debug logs enabled")
else:
set_log_level_to_info()
logger.info("Debug logs disabled")

return v

@field_validator("run_id", mode="before")
Expand Down Expand Up @@ -848,7 +852,9 @@ def _run(

# -*- Add chat history to the messages list
if self.add_chat_history_to_messages:
llm_messages += self.memory.get_last_n_messages(last_n=self.num_history_messages)
llm_messages += self.memory.get_last_n_messages_starting_from_the_user_message(
last_n=self.num_history_messages
)

# -*- Build the User prompt
# References to add to the user_prompt if add_references_to_prompt is True
Expand Down Expand Up @@ -1055,7 +1061,9 @@ async def _arun(
# -*- Add chat history to the messages list
if self.add_chat_history_to_messages:
if self.memory is not None:
llm_messages += self.memory.get_last_n_messages(last_n=self.num_history_messages)
llm_messages += self.memory.get_last_n_messages_starting_from_the_user_message(
last_n=self.num_history_messages
)

# -*- Build the User prompt
# References to add to the user_prompt if add_references_to_prompt is True
Expand Down
35 changes: 21 additions & 14 deletions phi/document/reader/csv_reader.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,37 @@
import csv
from pathlib import Path
from typing import List

from typing import List, Union, IO, Any
from phi.document.base import Document
from phi.document.reader.base import Reader
from phi.utils.log import logger
import io


class CSVReader(Reader):
"""Reader for CSV files"""

def read(self, path: Path, delimiter: str = " ", quotechar: str = "|") -> List[Document]:
if not path:
raise ValueError("No path provided")

if not path.exists():
raise FileNotFoundError(f"Could not find file: {path}")
def read(self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"') -> List[Document]:
if not file:
raise ValueError("No file provided")

try:
logger.info(f"Reading: {path}")
csv_name = path.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
if isinstance(file, Path):
if not file.exists():
raise FileNotFoundError(f"Could not find file: {file}")
logger.info(f"Reading: {file}")
file_content = file.open(newline="", mode="r", encoding="utf-8")
else:
logger.info(f"Reading uploaded file: {file.name}")
file.seek(0)
file_content = io.StringIO(file.read().decode("utf-8"))

csv_name = Path(file.name).stem if isinstance(file, Path) else file.name.split(".")[0]
csv_content = ""
with open(path, newline="") as csvfile:
with file_content as csvfile:
csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
for row in csv_reader:
csv_content += ", ".join(row)
csv_content += ", ".join(row) + "\n"

documents = [
Document(
name=csv_name,
Expand All @@ -39,5 +46,5 @@ def read(self, path: Path, delimiter: str = " ", quotechar: str = "|") -> List[D
return chunked_documents
return documents
except Exception as e:
logger.error(f"Error reading: {path}: {e}")
return []
logger.error(f"Error reading: {file.name if isinstance(file, IO) else file}: {e}")
return []
37 changes: 19 additions & 18 deletions phi/document/reader/docx.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
from pathlib import Path
from typing import List

from typing import List, Union
from phi.document.base import Document
from phi.document.reader.base import Reader
from phi.utils.log import logger
import io
from docx import Document as DocxDocument


class DocxReader(Reader):
"""Reader for Doc/Docx files"""

def read(self, path: Path) -> List[Document]:
if not path:
raise ValueError("No path provided")

if not path.exists():
raise FileNotFoundError(f"Could not find file: {path}")
def read(self, file: Union[Path, io.BytesIO]) -> List[Document]:
if not file:
raise ValueError("No file provided")

try:
import textract # noqa: F401
except ImportError:
raise ImportError("`textract` not installed")
if isinstance(file, Path):
logger.info(f"Reading: {file}")
docx_document = DocxDocument(file)
doc_name = file.stem
else: # Handle file-like object from upload
logger.info(f"Reading uploaded file: {file.name}")
docx_document = DocxDocument(file)
doc_name = file.name.split(".")[0]

doc_content = "\n\n".join([para.text for para in docx_document.paragraphs])

try:
logger.info(f"Reading: {path}")
doc_name = path.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
doc_content = textract.process(path)
documents = [
Document(
name=doc_name,
id=doc_name,
content=doc_content.decode("utf-8"),
content=doc_content,
)
]
if self.chunk:
Expand All @@ -39,5 +40,5 @@ def read(self, path: Path) -> List[Document]:
return chunked_documents
return documents
except Exception as e:
logger.error(f"Error reading: {path}: {e}")
return []
logger.error(f"Error reading file: {e}")
return []
Loading

0 comments on commit 279e22e

Please sign in to comment.