-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7c61263
commit 4c79931
Showing
1 changed file
with
65 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
The script builds a wordcloud using the Python WordCloud package. | ||
""" | ||
import sys | ||
from collections import Counter | ||
|
||
from wordcloud import WordCloud | ||
import matplotlib.pyplot as plt | ||
|
||
from util import connect_to_database_server | ||
|
||
DATABASE = "ABCNews" | ||
|
||
def generate_wc(conn, cur, n_g): | ||
""" | ||
Generates WordCloud from the text queried from the database. | ||
n_g : 1, for single word, 2 for bigrams, 3 for trigrams, and so on. | ||
""" | ||
query = "select headline_text from abcnews;" | ||
|
||
cur.execute(query) | ||
row = cur.fetchone() | ||
|
||
text = " " | ||
while row is not None: | ||
text += " " + " ".join([x.lower() for x in row]) | ||
row = cur.fetchone() | ||
|
||
cur.close() | ||
conn.close() | ||
|
||
ngrams = lambda a, n: zip(*[a[i:] for i in range(n)]) | ||
n_grams = Counter(ngrams(text.lower().split(), n_g)).most_common(50) | ||
|
||
dict_n_grams = {} | ||
for key, val in dict(n_grams).items(): | ||
dict_n_grams[' '.join(key)] = val | ||
|
||
word_cloud = WordCloud(max_font_size=40, collocations=False, \ | ||
background_color="white", width=512, height=384) | ||
word_cloud.generate_from_frequencies(frequencies=dict_n_grams) | ||
plt.figure() | ||
plt.imshow(word_cloud, interpolation="bilinear") | ||
plt.axis("off") | ||
plt.show() | ||
|
||
def main(): | ||
""" | ||
Entry-point for the function. | ||
""" | ||
conn_obj = connect_to_database_server(DATABASE) | ||
|
||
if conn_obj == -1: | ||
print("Connection to PostgreSQL Database: {} failed.".format(DATABASE)) | ||
sys.exit(0) | ||
else: | ||
conn = conn_obj[0] | ||
cur = conn_obj[1] | ||
|
||
n_g = 3 #2 for bigrams, 3 for trigrams and so on. | ||
generate_wc(conn, cur, n_g) | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|