Skip to content

Commit

Permalink
Save unique tweets
Browse files Browse the repository at this point in the history
  • Loading branch information
frandier committed Nov 10, 2022
1 parent 611b8a1 commit 315d1ba
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
6 changes: 4 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import spacy
from dotenv import load_dotenv
from pymongo import MongoClient
from pymongo import MongoClient, ASCENDING
from sklearn.feature_extraction.text import TfidfVectorizer
from art import tprint

Expand Down Expand Up @@ -73,6 +73,7 @@ def new_database():
try:
client = MongoClient(os.environ.get("mongo_uri"))
db = client.twitter
db.tweets.create_index([('id', ASCENDING)], unique=True)
return db
except:
print("Could not connect to MongoDB")
Expand All @@ -90,6 +91,7 @@ def get_tweets_from_twitter(client):
def save_tweets(db, tweets):
for tweet in tweets:
tw = {
"id": tweet.id,
"text": tweet.text,
"user": tweet.user.screen_name,
"location": tweet.user.location,
Expand All @@ -100,7 +102,7 @@ def save_tweets(db, tweets):
tw_id = db.tweets.insert_one(tw).inserted_id
print("Tweet inserted with id: ", tw_id)
except:
print("Could not insert tweet")
print("Could not insert tweet", tweet.id)

def clean_and_normalize_tweets(db, clean, nlp):
tweets = db.tweets.find()
Expand Down
40 changes: 40 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,51 @@
ansiwrap==0.8.4
art==5.7
blis==0.7.8
catalogue==2.0.8
certifi==2022.9.24
charset-normalizer==2.1.1
click==8.1.3
confection==0.0.3
cymem==2.0.6
dnspython==2.2.1
es-core-news-md @ https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0-py3-none-any.whl
idna==3.4
Jinja2==3.1.2
joblib==1.2.0
langcodes==3.3.0
MarkupSafe==2.1.1
murmurhash==1.0.8
nltk==3.7
numpy==1.23.3
oauthlib==3.2.1
packaging==21.3
pandas==1.5.0
pathy==0.6.2
preshed==3.0.7
pydantic==1.9.2
pymongo==4.2.0
pyparsing==3.0.9
python-dateutil==2.8.2
python-dotenv==0.21.0
pytz==2022.4
regex==2022.9.13
requests==2.28.1
requests-oauthlib==1.3.1
scikit-learn==1.1.2
scipy==1.9.2
six==1.16.0
sklearn==0.0
smart-open==5.2.1
spacy==3.4.1
spacy-legacy==3.0.10
spacy-loggers==1.0.3
srsly==2.4.4
textwrap3==0.9.2
thinc==8.1.3
threadpoolctl==3.1.0
tqdm==4.64.1
tweepy==4.10.1
typer==0.4.2
typing_extensions==4.4.0
urllib3==1.26.12
wasabi==0.10.1

0 comments on commit 315d1ba

Please sign in to comment.