Skip to content

Commit

Permalink
add heart
Browse files Browse the repository at this point in the history
  • Loading branch information
lzjun567 committed Feb 15, 2017
1 parent e95494b commit c985586
Show file tree
Hide file tree
Showing 33 changed files with 940 additions and 16,751 deletions.
Binary file removed agone-Heart.png
Binary file not shown.
Empty file added blog/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
Empty file added heart/__init__.py
Empty file.
File renamed without changes
Binary file added heart/heart.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
87 changes: 87 additions & 0 deletions heart/heart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# -*- coding:utf-8 -*-
import codecs
import csv
import re

import jieba.analyse
import matplotlib.pyplot as plt
import requests
from scipy.misc import imread
from wordcloud import WordCloud

__author__ = 'liuzhijun'

cookies = {
"ALF": "xxxx",
"SCF": "xxxxxx.",
"SUBP": "xxxxx",
"SUB": "xxxx",
"SUHB": "xxx-", "xx": "xx", "_T_WM": "xxx",
"gsScrollPos": "", "H5_INDEX": "0_my", "H5_INDEX_TITLE": "xxx",
"M_WEIBOCN_PARAMS": "xxxx"
}


def fetch_weibo():
api = "http://m.weibo.cn/index/my?format=cards&page=%s"
for i in range(1, 102):
response = requests.get(url=api % i, cookies=cookies)
data = response.json()[0]
groups = data.get("card_group") or []
for group in groups:
text = group.get("mblog").get("text")
text = text.encode("utf-8")

def cleanring(content):
"""
去掉无用字符
"""
pattern = "<a .*?/a>|<i .*?/i>|转发微博|//:|Repost|,|?|。|、|分享图片"
content = re.sub(pattern, "", content)
return content

text = cleanring(text).strip()
if text:
yield text


def write_csv(texts):
with codecs.open('./weibo.csv', 'w') as f:
writer = csv.DictWriter(f, fieldnames=["text"])
writer.writeheader()
for text in texts:
writer.writerow({"text": text})


def read_csv():
with codecs.open('./weibo.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
yield row['text']


def word_segment(texts):
jieba.analyse.set_stop_words("./stopwords.txt")
for text in texts:
tags = jieba.analyse.extract_tags(text, topK=20)
yield " ".join(tags)


def generate_img(texts):
data = " ".join(text for text in texts)

mask_img = imread('./heart-mask.jpg', flatten=True)
wordcloud = WordCloud(
font_path='msyh.ttc',
background_color='white',
mask=mask_img
).generate(data)
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./heart.jpg', dpi=600)


if __name__ == '__main__':
texts = fetch_weibo()
write_csv(texts)
generate_img(word_segment(read_csv()))
File renamed without changes.
853 changes: 853 additions & 0 deletions heart/weibo.csv

Large diffs are not rendered by default.

Binary file removed my_twitter_wordcloud_1.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_10.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_11.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_12.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_13.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_2.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_3.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_4.png
Binary file not shown.
Binary file removed my_twitter_wordcloud_5.png
Binary file not shown.
Empty file added pdf/__init__.py
Empty file.
File renamed without changes
File renamed without changes.
File renamed without changes.
4,225 changes: 0 additions & 4,225 deletions test.html

This file was deleted.

4,342 changes: 0 additions & 4,342 deletions test2.html

This file was deleted.

4,343 changes: 0 additions & 4,343 deletions test3.html

This file was deleted.

1,730 changes: 0 additions & 1,730 deletions test4.html

This file was deleted.

Binary file removed twitter_mask.png
Binary file not shown.
936 changes: 0 additions & 936 deletions weibo.csv

This file was deleted.

179 changes: 0 additions & 179 deletions weibo.py

This file was deleted.

Loading

0 comments on commit c985586

Please sign in to comment.