forked from Show-Me-the-Code/python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimportant_word.py
43 lines (39 loc) · 1.15 KB
/
important_word.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- coding: utf-8 -*-
import re
import os
# Get all files in designated path
def get_files(path):
filepath = os.listdir(path)
files = []
for fp in filepath:
fppath = path + '/' + fp
if(os.path.isfile(fppath)):
files.append(fppath)
elif(os.path.isdir(fppath)):
files += get_files(fppath)
return files
# Get the most popular word in designated files
def get_important_word(files):
worddict = {}
for filename in files:
f = open(filename, 'rb')
s = f.read()
words = re.findall(r'[a-zA-Z0-9]+', s)
for word in words:
worddict[word] = worddict[word] + 1 if word in worddict else 1
f.close()
wordsort = sorted(worddict.items(), key=lambda e:e[1], reverse=True)
return wordsort
if __name__ == '__main__':
files = get_files('.')
print files
wordsort = get_important_word(files)
# 避免遗漏有多个最大值的情况
maxnum = 1
for i in range(len(wordsort) - 1):
if wordsort[i][1] == wordsort[i + 1][1]:
maxnum += 1
else:
break
for i in range(maxnum):
print wordsort[i]