Skip to content

Commit

Permalink
Merge pull request fxsjy#240 from sing1ee/master
Browse files Browse the repository at this point in the history
build stable sort for graph iteration
  • Loading branch information
fxsjy committed Feb 16, 2015
2 parents 49657c9 + 8b8c6c8 commit 4e05cde
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions jieba/analyse/textrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from operator import itemgetter
import jieba.posseg as pseg


class UndirectWeightedGraph:
d = 0.85

Expand All @@ -22,21 +23,23 @@ def rank(self):
ws = collections.defaultdict(float)
outSum = collections.defaultdict(float)

giter = list(self.graph.items()) # these two lines for build stable iteration
giter.sort()
wsdef = 1.0 / (len(self.graph) or 1.0)
for n, out in self.graph.items():
for n, out in giter:
ws[n] = wsdef
outSum[n] = sum((e[2] for e in out), 0.0)

for x in xrange(10): # 10 iters
for n, inedges in self.graph.items():
for x in range(10): # 10 iters
for n, inedges in giter:
s = 0
for e in inedges:
s += e[2] / outSum[e[1]] * ws[e[1]]
ws[n] = (1 - self.d) + self.d * s

(min_rank, max_rank) = (sys.float_info[0], sys.float_info[3])

for w in itervalues(ws):
for _, w in ws.items():
if w < min_rank:
min_rank = w
elif w > max_rank:
Expand Down Expand Up @@ -64,9 +67,9 @@ def textrank(sentence, topK=10, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v'
cm = collections.defaultdict(int)
span = 5
words = list(pseg.cut(sentence))
for i in xrange(len(words)):
for i in range(len(words)):
if words[i].flag in pos_filt:
for j in xrange(i + 1, i + span):
for j in range(i + 1, i + span):
if j >= len(words):
break
if words[j].flag not in pos_filt:
Expand All @@ -75,7 +78,6 @@ def textrank(sentence, topK=10, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v'

for terms, w in cm.items():
g.addEdge(terms[0], terms[1], w)

nodes_rank = g.rank()
if withWeight:
tags = sorted(nodes_rank.items(), key=itemgetter(1), reverse=True)
Expand Down

0 comments on commit 4e05cde

Please sign in to comment.