Skip to content

Commit

Permalink
add requirements.txt to replace conda-.yml documentation; remove 2 li…
Browse files Browse the repository at this point in the history
…nes from scraper to ensure compatibility
  • Loading branch information
raphaelzhou1 committed Nov 10, 2023
1 parent f1a63f1 commit e4b77a2
Show file tree
Hide file tree
Showing 9 changed files with 2,648 additions and 19 deletions.
2 changes: 1 addition & 1 deletion fingpt/FinGPT_RAG/multisource_retrieval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

## Setup

* Visit environment_news_scraping.yml for the environment setup
* Set up your .env file, can refer to /FinGPT_sentiment/.env.example
* Visit FinGPT_RAG/requirements.txt for the environment setup

``` python

Expand Down
Empty file.
2,487 changes: 2,487 additions & 0 deletions fingpt/FinGPT_RAG/multisource_retrieval/data/sent_valid_scraped.csv

Large diffs are not rendered by default.

15 changes: 6 additions & 9 deletions fingpt/FinGPT_RAG/multisource_retrieval/news_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from scrapers.cnbc import scrape_cnbc
from scrapers.market_screener import scrape_market_screener
from scrapers import url_encode
from scrapers.google.scrape_google import scrape_google

# TODO: Twitter API requests # https://twitter.com/bryan4665/

Expand Down Expand Up @@ -607,24 +608,20 @@ def select_column_and_classify():
choices=column_names)
if not sentence_column:
raise ValueError("Invalid context selected selection")
classification_column = gui.buttonbox("Column Selection",
"Select the column for classification in the CSV:",
choices=column_names)
if not classification_column:
raise ValueError("Invalid context classification column selection")

counter = 0 # Counter variable to track the number of rows processed
row_index_input = gui.enterbox("Enter the row index to classify", "Row Index Input")
row_index_input = gui.enterbox("Enter the row index to classify", "Row Index Input", 1)
if row_index_input is None or not row_index_input.isdigit() or int(row_index_input) >= len(df):
row_index = 1 # Set a default starting index
else:
row_index = int(row_index_input)

print("loaded file as df: ", df)

for row_index, row in itertools.islice(df.iterrows(), row_index, None):
# If role is not empty or N/A or has the same sentence as "contextualized_sentence", means context is added, then skip
if process_existing_file and row["link"] != "N/A" and not pd.isnull(row["link"]) and row[sentence_column] != row["contextualized_sentence"]:
continue

# if process_existing_file and row["link"] != "N/A" and not pd.isnull(row["link"]) and row[sentence_column] != row["contextualized_sentence"]:
# continue
target_sentence = row[sentence_column]
ticker, remaining_sentence, link = split_sentence(target_sentence)

Expand Down
Empty file.
Empty file.
163 changes: 154 additions & 9 deletions fingpt/FinGPT_RAG/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,154 @@
tokenizers>=0.13.3
bitsandbytes
datasets>=2.8.0
sentencepiece>=0.1.97
protobuf==3.20.3
accelerate>=0.15.0
torch>=1.12.0
deepspeed>=0.9.0
git+https://github.com/huggingface/transformers
accelerate==0.23.0
aiohttp==3.8.5
aiosignal==1.3.1
anyio==4.0.0
appnope==0.1.3
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.0
async-lru==2.0.4
async-timeout==4.0.3
attrs==23.1.0
Babel==2.12.1
backcall==0.2.0
beautifulsoup4==4.12.2
bleach==6.0.0
bs4==0.0.1
certifi==2023.7.22
cffi==1.16.0
charset-normalizer==3.3.0
comm==0.1.4
datasets==2.14.5
debugpy==1.8.0
decorator==5.1.1
defusedxml==0.7.1
dill==0.3.7
easygui==0.98.2
executing==2.0.0
fastjsonschema==2.18.1
filelock==3.12.4
fqdn==1.5.1
frozenlist==1.4.0
fsspec==2023.6.0
h11==0.14.0
huggingface-hub==0.16.4
idna==3.4
ipykernel==6.25.2
ipython==8.16.0
ipython-genutils==0.2.0
ipywidgets==8.1.1
isoduration==20.11.0
jedi==0.19.0
Jinja2==3.1.2
joblib==1.3.2
json5==0.9.14
jsonpointer==2.4
jsonschema==4.19.1
jsonschema-specifications==2023.7.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.7.0
jupyter-lsp==2.2.0
jupyter_client==8.3.1
jupyter_core==5.3.2
jupyter_server==2.7.3
jupyter_server_terminals==0.4.4
jupyterlab==4.0.6
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.9
jupyterlab_server==2.25.0
loguru==0.7.2
lxml==4.9.3
MarkupSafe==2.1.3
matplotlib-inline==0.1.6
mistune==3.0.2
mpmath==1.3.0
multidict==6.0.4
multiprocess==0.70.15
nbclient==0.8.0
nbconvert==7.8.0
nbformat==5.9.2
nest-asyncio==1.5.8
networkx==3.1
notebook==7.0.4
notebook_shim==0.2.3
numpy==1.26.0
oauthlib==3.2.2
openai==0.28.1
outcome==1.2.0
overrides==7.4.0
packaging==23.2
pandas==2.1.1
pandocfilters==1.5.0
parso==0.8.3
peft==0.5.0
pexpect==4.8.0
pickleshare==0.7.5
platformdirs==3.10.0
prometheus-client==0.17.1
prompt-toolkit==3.0.39
psutil==5.9.5
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==13.0.0
pycparser==2.21
Pygments==2.16.1
PySocks==1.7.1
python-dateutil==2.8.2
python-dotenv==1.0.0
python-json-logger==2.0.7
pytz==2023.3.post1
PyYAML==6.0.1
pyzmq==25.1.1
qtconsole==5.4.4
QtPy==2.4.0
referencing==0.30.2
regex==2023.10.3
requests==2.31.0
requests-oauthlib==1.3.1
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.10.3
safetensors==0.3.3
scikit-learn==1.3.1
scipy==1.11.3
searchtweets==1.7.6
selenium==4.13.0
Send2Trash==1.8.2
simplejson==3.19.1
six==1.16.0
sniffio==1.3.0
sortedcontainers==2.4.0
soupsieve==2.5
stack-data==0.6.3
sympy==1.12
terminado==0.17.1
threadpoolctl==3.2.0
tinycss2==1.2.1
tokenizers==0.14.0
torch==2.0.1
tornado==6.3.3
tqdm==4.66.1
traitlets==5.10.1
transformers==4.34.0
trio==0.22.2
trio-websocket==0.11.1
tushare==1.2.89
tweepy==4.14.0
tweet-parser==1.13.2
types-python-dateutil==2.8.19.14
typing_extensions==4.8.0
tzdata==2023.3
uri-template==1.3.0
urllib3==2.0.5
wcwidth==0.2.8
webcolors==1.13
webdriver-manager==4.0.1
webencodings==0.5.1
websocket-client==0.57.0
widgetsnbextension==4.0.9
wsproto==1.2.0
xxhash==3.3.0
yarl==1.9.2
zenrows==1.3.1

0 comments on commit e4b77a2

Please sign in to comment.