Skip to content

Commit

Permalink
Add function to group by hot words
Browse files Browse the repository at this point in the history
  • Loading branch information
foolcage committed Aug 1, 2023
1 parent 2a3139b commit 03aee86
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 57 deletions.
49 changes: 36 additions & 13 deletions examples/data_runner/kdata_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from examples.recorder_utils import run_data_recorder
from examples.report_utils import inform
from zvt import init_log
from zvt.api.selector import get_entity_ids_by_filter
from zvt.domain import (
Stock,
Stock1dHfqKdata,
Expand All @@ -16,6 +17,7 @@
BlockCategory,
Index,
Index1dKdata,
StockNews,
)
from zvt.informer import EmailInformer
from zvt.utils import next_date, current_date
Expand All @@ -25,6 +27,22 @@
sched = BackgroundScheduler()


@sched.scheduled_job("cron", hour=16, minute=30, day_of_week="mon-fri")
def record_stock_news(data_provider="em"):
normal_stock_ids = get_entity_ids_by_filter(
provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False
)

run_data_recorder(
entity_ids=normal_stock_ids,
day_data=True,
domain=StockNews,
data_provider=data_provider,
force_update=False,
sleeping_time=2,
)


@sched.scheduled_job("cron", hour=15, minute=30, day_of_week="mon-fri")
def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2):
# A股指数
Expand All @@ -38,17 +56,6 @@ def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2)
sleeping_time=sleeping_time,
)

# A股标的
run_data_recorder(domain=Stock, data_provider=data_provider, force_update=False)
# A股后复权行情
run_data_recorder(
domain=Stock1dHfqKdata,
data_provider=data_provider,
entity_provider=entity_provider,
day_data=True,
sleeping_time=sleeping_time,
)

# 板块(概念,行业)
run_data_recorder(domain=Block, entity_provider=entity_provider, data_provider=entity_provider, force_update=False)
# 板块行情(概念,行业)
Expand Down Expand Up @@ -76,8 +83,24 @@ def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2)
title="report 新概念",
entity_provider=entity_provider,
entity_type="block",
em_group="关注板块",
em_group_over_write=True,
em_group="练气",
em_group_over_write=False,
)

# A股标的
run_data_recorder(domain=Stock, data_provider=data_provider, force_update=False)
# A股后复权行情
normal_stock_ids = get_entity_ids_by_filter(
provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False
)

run_data_recorder(
entity_ids=normal_stock_ids,
domain=Stock1dHfqKdata,
data_provider=data_provider,
entity_provider=entity_provider,
day_data=True,
sleeping_time=sleeping_time,
)


Expand Down
44 changes: 44 additions & 0 deletions examples/hot.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"新能源": [
"新能源",
"锂电 锂电池",
"钠离子电池",
"光伏",
"太阳能",
"储能",
"TOPCON电池",
"风电",
"核电"
],
"新能车": [
"新能车 新能源汽车",
"整车 汽车整车",
"汽车零部件 汽车零件",
"无人驾驶",
"压铸一体化 一体化压铸"
],
"人工智能": [
"人工智能 AI",
"GPT CHATGPT",
"算力"
],
"机器人": [
"机器人",
"减速器",
"伺服 伺服系统",
"控制系统",
"电机"
],
"核心资产": [
"核心资产",
"白马",
"沪深300",
"基金重仓",
"上证50"
],
"人民币国际化": [
"人民币国际化",
"一带一路",
"跨境支付"
]
}
42 changes: 21 additions & 21 deletions examples/reports/report_tops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ def report_top_stocks():
entity_type="stock",
entity_provider="em",
data_provider="em",
periods=[*range(2, 21)],
periods=[*range(2, 27)],
ignore_new_stock=True,
ignore_st=True,
adjust_type=None,
top_count=20,
top_count=25,
turnover_threshold=0,
turnover_rate_threshold=0,
informer=email_informer,
Expand All @@ -40,7 +40,7 @@ def report_top_stocks():
entity_type="stock",
entity_provider="em",
data_provider="em",
periods=[*range(21, 60)],
periods=[*range(27, 67)],
ignore_new_stock=True,
ignore_st=True,
adjust_type=None,
Expand Down Expand Up @@ -123,7 +123,7 @@ def report_top_stockhks():
entity_provider="em",
data_provider="em",
top_count=10,
periods=[*range(2, 10)],
periods=[*range(2, 27)],
ignore_new_stock=False,
ignore_st=False,
adjust_type=None,
Expand Down Expand Up @@ -152,30 +152,30 @@ def report_top_stockhks():
return_type=TopType.positive,
)

report_top_entities(
entity_type="stockhk",
entity_provider="em",
data_provider="em",
top_count=20,
periods=[365, 750],
ignore_new_stock=True,
ignore_st=False,
adjust_type=None,
turnover_threshold=50000000,
turnover_rate_threshold=0.005,
informer=email_informer,
em_group="谁有我惨",
em_group_over_write=False,
return_type=TopType.negative,
)
# report_top_entities(
# entity_type="stockhk",
# entity_provider="em",
# data_provider="em",
# top_count=20,
# periods=[365, 750],
# ignore_new_stock=True,
# ignore_st=False,
# adjust_type=None,
# turnover_threshold=50000000,
# turnover_rate_threshold=0.005,
# informer=email_informer,
# em_group="谁有我惨",
# em_group_over_write=False,
# return_type=TopType.negative,
# )


if __name__ == "__main__":
init_log("report_tops.log")

report_top_stocks()
report_top_blocks()
# report_top_stockhks()
report_top_stockhks()

sched.start()

Expand Down
17 changes: 10 additions & 7 deletions examples/reports/report_vol_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


@sched.scheduled_job("cron", hour=17, minute=0, day_of_week="mon-fri")
def report_vol_up():
def report_vol_up_stocks():
target_date = get_latest_kdata_date(entity_type="stock", adjust_type=AdjustType.hfq, provider="em")
entity_ids = get_mini_and_small_stock(timestamp=target_date, provider="em")

Expand All @@ -34,7 +34,7 @@ def report_vol_up():
em_group_over_write=True,
filter_by_volume=False,
adjust_type=AdjustType.hfq,
start_timestamp="2019-01-01",
start_timestamp="2021-01-01",
# factor args
windows=[120, 250],
over_mode="or",
Expand All @@ -56,7 +56,7 @@ def report_vol_up():
em_group_over_write=False,
filter_by_volume=False,
adjust_type=AdjustType.hfq,
start_timestamp="2019-01-01",
start_timestamp="2021-01-01",
# factor args
windows=[120, 250],
over_mode="or",
Expand All @@ -66,6 +66,9 @@ def report_vol_up():
entity_ids=entity_ids,
)


@sched.scheduled_job("cron", hour=17, minute=30, day_of_week="mon-fri")
def report_vol_up_stockhks():
report_targets(
factor_cls=VolumeUpMaFactor,
entity_provider="em",
Expand All @@ -77,11 +80,11 @@ def report_vol_up():
em_group_over_write=False,
filter_by_volume=False,
adjust_type=AdjustType.hfq,
start_timestamp="2019-01-01",
start_timestamp="2021-01-01",
# factor args
windows=[120, 250],
over_mode="or",
up_intervals=20,
up_intervals=60,
turnover_threshold=100000000,
turnover_rate_threshold=0.01,
)
Expand All @@ -90,8 +93,8 @@ def report_vol_up():
if __name__ == "__main__":
init_log("report_vol_up.log")

report_vol_up()

report_vol_up_stocks()
report_vol_up_stockhks()
sched.start()

sched._thread.join()
105 changes: 105 additions & 0 deletions examples/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# -*- coding: utf-8 -*-
import json
import logging
import os
import pprint

import eastmoneypy
import pandas as pd

from zvt.api.stats import get_top_performance_entities_by_periods
from zvt.contract.api import get_entities
from zvt.domain import StockNews
from zvt.utils import next_date, today

logger = logging.getLogger(__name__)

Expand All @@ -19,3 +28,99 @@ def add_to_eastmoney(codes, group, entity_type="stock", over_write=True):

for code in codes:
eastmoneypy.add_to_group(code=code, entity_type=entity_type, group_name=group)


def get_hot_words_config():
with open(os.path.join(os.path.dirname(__file__), "hot.json")) as f:
return json.load(f)


def count_hot_words(text: str):
text = text.upper()
hot_words_config = get_hot_words_config()
word_stats = {}
topic_stats = {}
for topic in hot_words_config:
topic_count = 0
for word in hot_words_config[topic]:
word_stats[word] = text.count(word)
topic_count = topic_count + word_stats[word]
topic_stats[topic] = topic_count
return topic_stats, word_stats


def hot_stats(data: pd.Series):
pass


def group_stocks_by_topic(entities, start_timestamp=None):
if not start_timestamp:
start_timestamp = next_date(today(), -180)
stock_map = {}
for entity in entities:
stock_map[entity.entity_id] = {"code": entity.code, "name": entity.name}
df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=[entity.entity_id for entity in entities])
df = df.groupby("entity_id")["news_title"].apply(",".join).reset_index()

hot_words_config = get_hot_words_config()

hot_stocks_map = {}
topic_count = {}
word_count = {}
for _, row in df[["entity_id", "news_title"]].iterrows():
entity_id = row["entity_id"]
text = row["news_title"]

is_hot = False
for topic in hot_words_config:
topic_count.setdefault(topic, 0)
for words in hot_words_config[topic]:
hot_stocks_map.setdefault(words, [])
word_count.setdefault(words, 0)
for word in words.split():
count = text.count(word)
if count > 0:
word_count[words] = word_count[words] + 1
topic_count[topic] = topic_count[topic] + 1
hot_stocks_map[words].append(
(f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count)
)
is_hot = True
if not is_hot:
hot_stocks_map.setdefault("其他", [])
hot_stocks_map["其他"].append((f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", 0))

sorted_topics = sorted(topic_count.items(), key=lambda item: item[1], reverse=True)
sorted_words = sorted(word_count.items(), key=lambda item: item[1], reverse=True)

result = []
for topic, count in sorted_topics:
topic_words = hot_words_config[topic]
topic_words_stocks = [
(f"{words}({count})", sorted(hot_stocks_map[words], key=lambda item: item[1], reverse=True))
for (words, count) in sorted_words
if words in topic_words
]
result.append((f"{topic}({count})", topic_words_stocks))

result.append(("其他", [("其他", hot_stocks_map["其他"])]))

return result


if __name__ == "__main__":
ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em")

entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain")

group_info = group_stocks_by_topic(entities=entities)
info = ""
for group in group_info:
topic = group[0]
info = info + f"^^^^^^ {topic} ^^^^^^\n"
for topic_word, stocks_count in group[1]:
info = info + f"{topic_word}\n"
stocks = [f"{stock_count[0]} {stock_count[1]}" for stock_count in stocks_count]
info = info + "\n".join(stocks) + "\n"

print(info)
Loading

0 comments on commit 03aee86

Please sign in to comment.