File tree 5 files changed +125
-0
lines changed
5 files changed +125
-0
lines changed Original file line number Diff line number Diff line change
1
+ # _*_ encodeing: utf-8 _*_
2
+ from HTMLParser import HTMLParser
3
+ import urllib2
4
+
5
+ class myParser (HTMLParser ):
6
+ def __init__ (self ):
7
+ HTMLParser .__init__ (self );
8
+ self .flag = 0 ;
9
+ self .links = [];
10
+
11
+ def handle_starttag (self , tag , attrs ):
12
+ if tag == "a" :
13
+ for name ,value in attrs :
14
+ if name == "href" :
15
+ self .links .append (value );
16
+
17
+
18
+ if __name__ == "__main__" :
19
+ parser = myParser ();
20
+ myurl = 'http://www.baidu.com' ;
21
+ html = urllib2 .urlopen (myurl );
22
+ htmlcode = html .read ();
23
+ parser .feed (htmlcode );
24
+ print parser .links ;
25
+
Original file line number Diff line number Diff line change
1
+ 北京
2
+ 程序员
3
+ 公务员
4
+ 领导
5
+ 牛比
6
+ 牛逼
7
+ 你娘
8
+ 你妈
9
+ love
10
+ sex
11
+ jiangge
Original file line number Diff line number Diff line change
1
+ # -*-coding:utf-8-*-
2
+ import string
3
+
4
+ class senseWord ():
5
+ def __init__ (self ):
6
+ self .list = []
7
+ inputfile = file ('filtered_word.txt' ,'r' )
8
+ for lines in inputfile .readlines ():
9
+ self .list .append (lines .decode ('utf-8' ).encode ('gbk' ))#I've set the file coding type as utf-8
10
+ inputfile .close ()
11
+ self .list = map (string .strip ,self .list );
12
+ for item in self .list :
13
+ print item
14
+ def checkWord (self ,word ):
15
+ for words in self .list :
16
+ if words == word :
17
+ return True
18
+ return False
19
+
20
+ if __name__ == '__main__' :
21
+ myCheck = senseWord ()
22
+ ipstr = raw_input ()
23
+ while True :
24
+ ipstr = raw_input ()
25
+ if ipstr :
26
+ if (myCheck .checkWord (ipstr )):
27
+ print 'Freedom'
28
+ else :
29
+ print 'humanRight'
30
+ else :
31
+ break
32
+
33
+
Original file line number Diff line number Diff line change
1
+ # -*-coding:utf-8-*-
2
+ import string
3
+ class senseWord ():
4
+ def __init__ (self ):
5
+ self .list = []
6
+ self .word = []
7
+ inputfile = file ('filtered_word.txt' ,'r' )
8
+ for lines in inputfile .readlines ():
9
+ self .list .append (lines .decode ('utf-8' ).encode ('gbk' ))#I've set the file coding type as utf-8
10
+ inputfile .close ()
11
+ self .list = map (string .strip ,self .list );
12
+
13
+ def checkWord (self ,word ):
14
+ flag = False
15
+ for words in self .list :
16
+ if words in word :
17
+ self .word .append (words )
18
+ flag = True
19
+ return flag
20
+
21
+ def getWord (self ):
22
+
23
+ return self .word
24
+
25
+ if __name__ == '__main__' :
26
+ myCheck = senseWord ()
27
+ while True :
28
+ ipstr = str (raw_input ())
29
+ if ipstr :
30
+ if (myCheck .checkWord (ipstr )):
31
+ senseList = myCheck .getWord ()
32
+ for items in senseList :
33
+ length = len (items .decode ('gbk' ))
34
+ torep = '*' ;
35
+ for i in range (1 ,length ):
36
+ torep += '*'
37
+ ipstr = ipstr .replace (items ,torep )
38
+ print ipstr
39
+ else :
40
+ print ipstr
41
+ else :
42
+ break
43
+
44
+
45
+
Original file line number Diff line number Diff line change
1
+ 北京
2
+ 程序员
3
+ 公务员
4
+ 领导
5
+ 牛比
6
+ 牛逼
7
+ 你娘
8
+ 你妈
9
+ love
10
+ sex
11
+ jiangge
You can’t perform that action at this time.
0 commit comments