Skip to content

Commit

Permalink
✨ add xlsx and xls parser (QuivrHQ#997)
Browse files Browse the repository at this point in the history
  • Loading branch information
gozineb authored Aug 21, 2023
1 parent 5a3a6fe commit 3821502
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
19 changes: 19 additions & 0 deletions backend/parsers/xlsx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from langchain.document_loaders import UnstructuredExcelLoader
from models.files import File

from .common import process_file


def process_xlsx(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=UnstructuredExcelLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)
5 changes: 3 additions & 2 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pypdf==3.9.0
StrEnum==0.4.15
supabase==1.0.3
tiktoken==0.4.0
unstructured==0.6.5
unstructured==0.6.7
fastapi==0.95.2
python-multipart==0.0.6
uvicorn==0.22.0
Expand All @@ -26,4 +26,5 @@ psycopg2-binary==2.9.6
sqlalchemy==2.0.19
html5lib==1.1
bs4==0.0.1
newspaper3k
newspaper3k
xlrd==1.0.0
3 changes: 3 additions & 0 deletions backend/utils/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from parsers.pdf import process_pdf
from parsers.powerpoint import process_powerpoint
from parsers.txt import process_txt
from parsers.xlsx import process_xlsx

file_processors = {
".txt": process_txt,
Expand All @@ -29,6 +30,8 @@
".pptx": process_powerpoint,
".docx": process_docx,
".odt": process_odt,
".xlsx": process_xlsx,
".xls": process_xlsx,
".epub": process_epub,
".ipynb": process_ipnyb,
}
Expand Down

0 comments on commit 3821502

Please sign in to comment.