Skip to content

Commit 8897e65

Browse files
authored
Create fenCi.py
1 parent cb216bb commit 8897e65

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

jobSkill/fenCi.py

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from jieba import analyse
2+
from PIL import Image
3+
from wordcloud import WordCloud, ImageColorGenerator
4+
import matplotlib.pyplot as plt
5+
import numpy as np
6+
7+
fenCi = {}
8+
ciYunArray = []
9+
def main():
10+
11+
# 负责过滤的词语
12+
filterWords = ['熟悉', '熟练', '经验', '优先', '应用开发', '相关', '工作', '开发', '能力', '负责', '技术', '具备', '精通', '数据', 'ETC']
13+
14+
# 结巴分词基于 TF-IDF 算法的关键词
15+
tfidf = analyse.extract_tags
16+
17+
for zpInfo in open('sh.txt', 'r', encoding='utf-8'):
18+
19+
if zpInfo.strip() == '':
20+
continue
21+
# 详情数据是用&&&分割的
22+
infos = zpInfo.split("&&&")
23+
words = tfidf(infos[-1])
24+
25+
words = [x.upper() for x in words if x.upper() not in filterWords]
26+
27+
for word in words:
28+
word = word.upper()
29+
num = fenCi.get(word, 0) + 1
30+
fenCi[word] = num
31+
32+
print(sorted(fenCi.items(), key=lambda kv: (kv[1], kv[0]), reverse=True))
33+
print('分出了' + str(len(fenCi)) + '了词语')
34+
35+
36+
def getWordCloud():
37+
path_img = "python.jpg"
38+
background_image = np.array(Image.open(path_img))
39+
40+
wordcloud = WordCloud(
41+
font_path="/System/Library/Fonts/STHeiti Light.ttc", # 字体
42+
background_color="white",
43+
mask=background_image).generate(" ".join(list(fenCi.keys())))
44+
image_colors = ImageColorGenerator(background_image)
45+
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation="bilinear")
46+
plt.axis("off")
47+
plt.show()
48+
49+
50+
if __name__ == '__main__':
51+
main()
52+
getWordCloud()

0 commit comments

Comments
 (0)