Skip to content

Commit

Permalink
modify readme
Browse files Browse the repository at this point in the history
  • Loading branch information
letiantian committed Dec 1, 2014
1 parent 41c1224 commit 6d57d4d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,26 @@ import codecs
from textrank4zh import TextRank4Keyword, TextRank4Sentence
text = codecs.open('./text/01.txt', 'r', 'utf-8').read()
tr4w = TextRank4Keyword(stop_words_file='./stopword.data')
tr4w.train(text=text, speech_tag_filter=True, lower=True, window=2)
tr4w = TextRank4Keyword(stop_words_file='./stopword.data') # 导入停止词
#使用词性过滤,文本小写,窗口为2
tr4w.train(text=text, speech_tag_filter=True, lower=True, window=2)
print '关键词:'
print '/'.join(tr4w.get_keywords(10, word_min_len=2))
# 10个关键词且每个的长度最小为2
print '/'.join(tr4w.get_keywords(10, word_min_len=2))
print '关键短语:'
print '/'.join(tr4w.get_keyphrases(keywords_num=20, min_occur_num= 2))
# 20个关键词去构造短语,短语在原文本中出现次数最少为2
print '/'.join(tr4w.get_keyphrases(keywords_num=20, min_occur_num= 2))
tr4s = TextRank4Sentence(stop_words_file='./stopword.data')
# 使用词性过滤,文本小写,使用words_all_filters生成句子之间的相似性
tr4s.train(text=text, speech_tag_filter=True, lower=True, source = 'all_filters')
print '摘要:'
print '\n'.join(tr4s.get_key_sentences(num=3))
print '\n'.join(tr4s.get_key_sentences(num=3)) # 重要性最高的三个句子
```

得到的关键词:
Expand Down
16 changes: 11 additions & 5 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@
from textrank4zh import TextRank4Keyword, TextRank4Sentence

text = codecs.open('./text/01.txt', 'r', 'utf-8').read()
tr4w = TextRank4Keyword(stop_words_file='./stopword.data')
tr4w.train(text=text, speech_tag_filter=True, lower=True, window=2)
tr4w = TextRank4Keyword(stop_words_file='./stopword.data') # 导入停止词

#使用词性过滤,文本小写,窗口为2
tr4w.train(text=text, speech_tag_filter=True, lower=True, window=2)

print '关键词:'
print '/'.join(tr4w.get_keywords(10, word_min_len=2))
# 10个关键词且每个的长度最小为2
print '/'.join(tr4w.get_keywords(10, word_min_len=2))

print '关键短语:'
print '/'.join(tr4w.get_keyphrases(keywords_num=20, min_occur_num= 2))
# 20个关键词去构造短语,短语在原文本中出现次数最少为2
print '/'.join(tr4w.get_keyphrases(keywords_num=20, min_occur_num= 2))

tr4s = TextRank4Sentence(stop_words_file='./stopword.data')

# 使用词性过滤,文本小写,使用words_all_filters生成句子之间的相似性
tr4s.train(text=text, speech_tag_filter=True, lower=True, source = 'all_filters')

print '摘要:'
print '\n'.join(tr4s.get_key_sentences(num=3))
print '\n'.join(tr4s.get_key_sentences(num=3)) # 重要性最高的三个句子

0 comments on commit 6d57d4d

Please sign in to comment.