Skip to content

Commit

Permalink
fix sorting and normalization: wasn't applied if frequencies were giv…
Browse files Browse the repository at this point in the history
…en directly.
  • Loading branch information
amueller committed Aug 18, 2015
1 parent 1f63d4a commit 943ede2
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions wordcloud/wordcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

item1 = itemgetter(1)

FONT_PATH = os.environ.get("FONT_PATH", os.path.join(os.path.dirname(__file__), "DroidSansMono.ttf"))
FONT_PATH = os.environ.get("FONT_PATH", os.path.join(os.path.dirname(__file__),
"DroidSansMono.ttf"))
STOPWORDS = set([x.strip() for x in open(os.path.join(os.path.dirname(__file__),
'stopwords')).read().split('\n')])

Expand Down Expand Up @@ -215,6 +216,15 @@ def generate_from_frequencies(self, frequencies):
self
"""
# make sure frequencies are sorted and normalized
frequencies = sorted(frequencies, key=lambda x: x[1], reverse=True)
frequencies = frequencies[:self.max_words]
# largest entry will be 1
max_frequency = np.max([freq for word, freq in frequencies])

for i, (word, freq) in enumerate(frequencies):
frequencies[i] = word, freq / max_frequency

if self.random_state is not None:
random_state = self.random_state
else:
Expand Down Expand Up @@ -361,35 +371,29 @@ def process_text(self, text):
d3[key_singular] = val_singular + val_plural
del d3[key]

words = sorted(d3.items(), key=item1, reverse=True)
words = words[:self.max_words]
maximum = float(max(d3.values()))
for i, (word, count) in enumerate(words):
words[i] = word, count / maximum

self.words_ = words
self.words_ = d3.items()

return words
return self.words_

def generate_from_text(self, text):
"""Generate wordcloud from text.
Calls process_text and fit_words.
Calls process_text and generate_from_frequencies.
Returns
-------
self
"""
self.process_text(text)
self.fit_words(self.words_)
self.generate_from_frequencies(self.words_)
return self

def generate(self, text):
"""Generate wordcloud from text.
Alias to generate_from_text.
Calls process_text and fit_words.
Calls process_text and generate_from_frequencies.
Returns
-------
Expand Down

0 comments on commit 943ede2

Please sign in to comment.