Skip to content

Commit

Permalink
add a withFlag param in textrank
Browse files Browse the repository at this point in the history
  • Loading branch information
jerryday committed Oct 30, 2015
1 parent 26e339f commit 4f8ca83
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
12 changes: 9 additions & 3 deletions jieba/analyse/textrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def rank(self):
for w in itervalues(ws):
if w < min_rank:
min_rank = w
elif w > max_rank:
if w > max_rank:
max_rank = w

for n, w in ws.items():
Expand All @@ -66,7 +66,7 @@ def pairfilter(self, wp):
return (wp.flag in self.pos_filt and len(wp.word.strip()) >= 2
and wp.word.lower() not in self.stop_words)

def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v')):
def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'), withFlag=False):
"""
Extract keywords from sentence using TextRank algorithm.
Parameter:
Expand All @@ -75,6 +75,8 @@ def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn
if False, return a list of words.
- allowPOS: the allowed POS list eg. ['ns', 'n', 'vn', 'v'].
if the POS of w is not in this list, it will be filtered.
- withFlag: if True, return a list of pair(word, weight) like posseg.cut
if False, return a list of words
"""
self.pos_filt = frozenset(allowPOS)
g = UndirectWeightedGraph()
Expand All @@ -87,7 +89,10 @@ def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn
break
if not self.pairfilter(words[j]):
continue
cm[(wp.word, words[j].word)] += 1
if allowPOS and withFlag:
cm[(wp, words[j])] += 1
else:
cm[(wp.word, words[j].word)] += 1

for terms, w in cm.items():
g.addEdge(terms[0], terms[1], w)
Expand All @@ -96,6 +101,7 @@ def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn
tags = sorted(nodes_rank.items(), key=itemgetter(1), reverse=True)
else:
tags = sorted(nodes_rank, key=nodes_rank.__getitem__, reverse=True)

if topK:
return tags[:topK]
else:
Expand Down
3 changes: 3 additions & 0 deletions jieba/posseg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def __str__(self):
def __iter__(self):
return iter((self.word, self.flag))

def __lt__(self, other):
return self.word < other.word

def encode(self, arg):
return self.__unicode__().encode(arg)

Expand Down

0 comments on commit 4f8ca83

Please sign in to comment.