Skip to content

Commit

Permalink
Refactored regex
Browse files Browse the repository at this point in the history
  • Loading branch information
frandier committed Oct 11, 2022
1 parent 2d95686 commit 95c9cea
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
11 changes: 5 additions & 6 deletions cleaner/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
class Cleaner:
def clean_tweet(self, tweet):
r = tweet.lower()
r = re.sub("@[A-Za-z0-9_]+","", r)
r = re.sub("#[A-Za-z0-9_]+","", r)
r = re.sub(r'http\S+', '', r)
r = re.sub('[()!?]', ' ', r)
r = re.sub('\[.*?\]',' ', r)
r = re.sub("[^a-z0-9]"," ", r)
r = re.sub("(@[A-Za-z0-9_]+)", ' ', r)
r = re.sub(r'((?<=[A-Za-z])(?=[A-Z][a-z]))',' ', r)
r = re.sub("([^A-Za-z0-9äÄëËïÏöÖüÜáéíóúáéíóúÁÉÍÓÚÂÊÎÔÛâêîôûàèìòùÀÈÌÒÙñÑ])",' ', r)
r = re.sub("(\w+:\/\/\S+)",' ', r)
r = ' '.join(r.split())
return r
3 changes: 2 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def main():

try:
query = str(input("Enter a search query: "))
tweets = client.get_tweets(query, 1, "4.570868,-74.297333,100km")
total = int(input("Enter the number of tweets to collect: "))
tweets = client.get_tweets(query, total, "4.570868,-74.297333,100km")
except:
print("Could not get tweets")
sys.exit(1)
Expand Down

0 comments on commit 95c9cea

Please sign in to comment.