Skip to content

Commit

Permalink
add anthor demo
Browse files Browse the repository at this point in the history
  • Loading branch information
刘志军 committed Jan 14, 2019
1 parent 372b0e3 commit b9d4a0d
Show file tree
Hide file tree
Showing 3 changed files with 5,276 additions and 0 deletions.
26 changes: 26 additions & 0 deletions fencitongji/fencitongji.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding:utf-8 -*-
import pkuseg
from collections import Counter
import pprint

content = []
with open("yanjiang.txt", encoding="utf-8") as f:
content = f.read()

lexicon = ['小程序', '朋友圈', '公众号'] #
seg = pkuseg.pkuseg(user_dict=lexicon) # 加载模型,给定用户词典
text = seg.cut(content)

stopwords = []

with open("stopword.txt", encoding="utf-8") as f:
stopwords = f.read()

new_text = []

for w in text:
if w not in stopwords:
new_text.append(w)

counter = Counter(new_text)
pprint.pprint(counter.most_common(50))
Loading

0 comments on commit b9d4a0d

Please sign in to comment.