diff --git a/examples/data_runner/kdata_runner.py b/examples/data_runner/kdata_runner.py index 41439517..00f3228e 100644 --- a/examples/data_runner/kdata_runner.py +++ b/examples/data_runner/kdata_runner.py @@ -6,6 +6,7 @@ from examples.recorder_utils import run_data_recorder from examples.report_utils import inform from zvt import init_log +from zvt.api.selector import get_entity_ids_by_filter from zvt.domain import ( Stock, Stock1dHfqKdata, @@ -16,6 +17,7 @@ BlockCategory, Index, Index1dKdata, + StockNews, ) from zvt.informer import EmailInformer from zvt.utils import next_date, current_date @@ -25,6 +27,22 @@ sched = BackgroundScheduler() +@sched.scheduled_job("cron", hour=16, minute=30, day_of_week="mon-fri") +def record_stock_news(data_provider="em"): + normal_stock_ids = get_entity_ids_by_filter( + provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False + ) + + run_data_recorder( + entity_ids=normal_stock_ids, + day_data=True, + domain=StockNews, + data_provider=data_provider, + force_update=False, + sleeping_time=2, + ) + + @sched.scheduled_job("cron", hour=15, minute=30, day_of_week="mon-fri") def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2): # A股指数 @@ -38,17 +56,6 @@ def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2) sleeping_time=sleeping_time, ) - # A股标的 - run_data_recorder(domain=Stock, data_provider=data_provider, force_update=False) - # A股后复权行情 - run_data_recorder( - domain=Stock1dHfqKdata, - data_provider=data_provider, - entity_provider=entity_provider, - day_data=True, - sleeping_time=sleeping_time, - ) - # 板块(概念,行业) run_data_recorder(domain=Block, entity_provider=entity_provider, data_provider=entity_provider, force_update=False) # 板块行情(概念,行业) @@ -76,8 +83,24 @@ def record_stock_data(data_provider="em", entity_provider="em", sleeping_time=2) title="report 新概念", entity_provider=entity_provider, entity_type="block", - em_group="关注板块", - em_group_over_write=True, + em_group="练气", + em_group_over_write=False, + ) + + # A股标的 + run_data_recorder(domain=Stock, data_provider=data_provider, force_update=False) + # A股后复权行情 + normal_stock_ids = get_entity_ids_by_filter( + provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False + ) + + run_data_recorder( + entity_ids=normal_stock_ids, + domain=Stock1dHfqKdata, + data_provider=data_provider, + entity_provider=entity_provider, + day_data=True, + sleeping_time=sleeping_time, ) diff --git a/examples/hot.json b/examples/hot.json new file mode 100644 index 00000000..d3427dbf --- /dev/null +++ b/examples/hot.json @@ -0,0 +1,44 @@ +{ + "新能源": [ + "新能源", + "锂电 锂电池", + "钠离子电池", + "光伏", + "太阳能", + "储能", + "TOPCON电池", + "风电", + "核电" + ], + "新能车": [ + "新能车 新能源汽车", + "整车 汽车整车", + "汽车零部件 汽车零件", + "无人驾驶", + "压铸一体化 一体化压铸" + ], + "人工智能": [ + "人工智能 AI", + "GPT CHATGPT", + "算力" + ], + "机器人": [ + "机器人", + "减速器", + "伺服 伺服系统", + "控制系统", + "电机" + ], + "核心资产": [ + "核心资产", + "白马", + "沪深300", + "基金重仓", + "上证50" + ], + "人民币国际化": [ + "人民币国际化", + "一带一路", + "跨境支付" + ] +} diff --git a/examples/reports/report_tops.py b/examples/reports/report_tops.py index 4e54a92d..604572c7 100644 --- a/examples/reports/report_tops.py +++ b/examples/reports/report_tops.py @@ -23,11 +23,11 @@ def report_top_stocks(): entity_type="stock", entity_provider="em", data_provider="em", - periods=[*range(2, 21)], + periods=[*range(2, 27)], ignore_new_stock=True, ignore_st=True, adjust_type=None, - top_count=20, + top_count=25, turnover_threshold=0, turnover_rate_threshold=0, informer=email_informer, @@ -40,7 +40,7 @@ def report_top_stocks(): entity_type="stock", entity_provider="em", data_provider="em", - periods=[*range(21, 60)], + periods=[*range(27, 67)], ignore_new_stock=True, ignore_st=True, adjust_type=None, @@ -123,7 +123,7 @@ def report_top_stockhks(): entity_provider="em", data_provider="em", top_count=10, - periods=[*range(2, 10)], + periods=[*range(2, 27)], ignore_new_stock=False, ignore_st=False, adjust_type=None, @@ -152,22 +152,22 @@ def report_top_stockhks(): return_type=TopType.positive, ) - report_top_entities( - entity_type="stockhk", - entity_provider="em", - data_provider="em", - top_count=20, - periods=[365, 750], - ignore_new_stock=True, - ignore_st=False, - adjust_type=None, - turnover_threshold=50000000, - turnover_rate_threshold=0.005, - informer=email_informer, - em_group="谁有我惨", - em_group_over_write=False, - return_type=TopType.negative, - ) + # report_top_entities( + # entity_type="stockhk", + # entity_provider="em", + # data_provider="em", + # top_count=20, + # periods=[365, 750], + # ignore_new_stock=True, + # ignore_st=False, + # adjust_type=None, + # turnover_threshold=50000000, + # turnover_rate_threshold=0.005, + # informer=email_informer, + # em_group="谁有我惨", + # em_group_over_write=False, + # return_type=TopType.negative, + # ) if __name__ == "__main__": @@ -175,7 +175,7 @@ def report_top_stockhks(): report_top_stocks() report_top_blocks() - # report_top_stockhks() + report_top_stockhks() sched.start() diff --git a/examples/reports/report_vol_up.py b/examples/reports/report_vol_up.py index 06738891..c27c6cd1 100644 --- a/examples/reports/report_vol_up.py +++ b/examples/reports/report_vol_up.py @@ -19,7 +19,7 @@ @sched.scheduled_job("cron", hour=17, minute=0, day_of_week="mon-fri") -def report_vol_up(): +def report_vol_up_stocks(): target_date = get_latest_kdata_date(entity_type="stock", adjust_type=AdjustType.hfq, provider="em") entity_ids = get_mini_and_small_stock(timestamp=target_date, provider="em") @@ -34,7 +34,7 @@ def report_vol_up(): em_group_over_write=True, filter_by_volume=False, adjust_type=AdjustType.hfq, - start_timestamp="2019-01-01", + start_timestamp="2021-01-01", # factor args windows=[120, 250], over_mode="or", @@ -56,7 +56,7 @@ def report_vol_up(): em_group_over_write=False, filter_by_volume=False, adjust_type=AdjustType.hfq, - start_timestamp="2019-01-01", + start_timestamp="2021-01-01", # factor args windows=[120, 250], over_mode="or", @@ -66,6 +66,9 @@ def report_vol_up(): entity_ids=entity_ids, ) + +@sched.scheduled_job("cron", hour=17, minute=30, day_of_week="mon-fri") +def report_vol_up_stockhks(): report_targets( factor_cls=VolumeUpMaFactor, entity_provider="em", @@ -77,11 +80,11 @@ def report_vol_up(): em_group_over_write=False, filter_by_volume=False, adjust_type=AdjustType.hfq, - start_timestamp="2019-01-01", + start_timestamp="2021-01-01", # factor args windows=[120, 250], over_mode="or", - up_intervals=20, + up_intervals=60, turnover_threshold=100000000, turnover_rate_threshold=0.01, ) @@ -90,8 +93,8 @@ def report_vol_up(): if __name__ == "__main__": init_log("report_vol_up.log") - report_vol_up() - + report_vol_up_stocks() + report_vol_up_stockhks() sched.start() sched._thread.join() diff --git a/examples/utils.py b/examples/utils.py index a855211d..c58748e3 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -1,7 +1,16 @@ # -*- coding: utf-8 -*- +import json import logging +import os +import pprint import eastmoneypy +import pandas as pd + +from zvt.api.stats import get_top_performance_entities_by_periods +from zvt.contract.api import get_entities +from zvt.domain import StockNews +from zvt.utils import next_date, today logger = logging.getLogger(__name__) @@ -19,3 +28,99 @@ def add_to_eastmoney(codes, group, entity_type="stock", over_write=True): for code in codes: eastmoneypy.add_to_group(code=code, entity_type=entity_type, group_name=group) + + +def get_hot_words_config(): + with open(os.path.join(os.path.dirname(__file__), "hot.json")) as f: + return json.load(f) + + +def count_hot_words(text: str): + text = text.upper() + hot_words_config = get_hot_words_config() + word_stats = {} + topic_stats = {} + for topic in hot_words_config: + topic_count = 0 + for word in hot_words_config[topic]: + word_stats[word] = text.count(word) + topic_count = topic_count + word_stats[word] + topic_stats[topic] = topic_count + return topic_stats, word_stats + + +def hot_stats(data: pd.Series): + pass + + +def group_stocks_by_topic(entities, start_timestamp=None): + if not start_timestamp: + start_timestamp = next_date(today(), -180) + stock_map = {} + for entity in entities: + stock_map[entity.entity_id] = {"code": entity.code, "name": entity.name} + df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=[entity.entity_id for entity in entities]) + df = df.groupby("entity_id")["news_title"].apply(",".join).reset_index() + + hot_words_config = get_hot_words_config() + + hot_stocks_map = {} + topic_count = {} + word_count = {} + for _, row in df[["entity_id", "news_title"]].iterrows(): + entity_id = row["entity_id"] + text = row["news_title"] + + is_hot = False + for topic in hot_words_config: + topic_count.setdefault(topic, 0) + for words in hot_words_config[topic]: + hot_stocks_map.setdefault(words, []) + word_count.setdefault(words, 0) + for word in words.split(): + count = text.count(word) + if count > 0: + word_count[words] = word_count[words] + 1 + topic_count[topic] = topic_count[topic] + 1 + hot_stocks_map[words].append( + (f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count) + ) + is_hot = True + if not is_hot: + hot_stocks_map.setdefault("其他", []) + hot_stocks_map["其他"].append((f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", 0)) + + sorted_topics = sorted(topic_count.items(), key=lambda item: item[1], reverse=True) + sorted_words = sorted(word_count.items(), key=lambda item: item[1], reverse=True) + + result = [] + for topic, count in sorted_topics: + topic_words = hot_words_config[topic] + topic_words_stocks = [ + (f"{words}({count})", sorted(hot_stocks_map[words], key=lambda item: item[1], reverse=True)) + for (words, count) in sorted_words + if words in topic_words + ] + result.append((f"{topic}({count})", topic_words_stocks)) + + result.append(("其他", [("其他", hot_stocks_map["其他"])])) + + return result + + +if __name__ == "__main__": + ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em") + + entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain") + + group_info = group_stocks_by_topic(entities=entities) + info = "" + for group in group_info: + topic = group[0] + info = info + f"^^^^^^ {topic} ^^^^^^\n" + for topic_word, stocks_count in group[1]: + info = info + f"{topic_word}\n" + stocks = [f"{stock_count[0]} {stock_count[1]}" for stock_count in stocks_count] + info = info + "\n".join(stocks) + "\n" + + print(info) diff --git a/src/zvt/api/selector.py b/src/zvt/api/selector.py index 01afe497..89d938e1 100644 --- a/src/zvt/api/selector.py +++ b/src/zvt/api/selector.py @@ -26,7 +26,13 @@ def get_entity_ids_by_filter( - provider="em", ignore_st=True, ignore_new_stock=True, target_date=None, entity_schema=Stock, entity_ids=None + provider="em", + ignore_delist=True, + ignore_st=True, + ignore_new_stock=True, + target_date=None, + entity_schema=Stock, + entity_ids=None, ): filters = [] if ignore_new_stock: @@ -34,12 +40,17 @@ def get_entity_ids_by_filter( target_date = current_date() pre_year = next_date(target_date, -365) filters += [entity_schema.timestamp <= pre_year] - if ignore_st: + if ignore_delist: filters += [ entity_schema.name.not_like("%退%"), + ] + + if ignore_st: + filters += [ entity_schema.name.not_like("%ST%"), entity_schema.name.not_like("%*ST%"), ] + return get_entity_ids(provider=provider, entity_schema=entity_schema, filters=filters, entity_ids=entity_ids) @@ -279,8 +290,8 @@ def get_middle_and_big_stock(timestamp, provider="em"): # mini = get_mini_cap_stock(timestamp=target_date) # print(len(mini)) # print(mini) - df = get_player_performance(start_timestamp="2022-01-01") - print(df) + # df = get_player_performance(start_timestamp="2022-01-01") + print(len(get_entity_ids_by_filter())) # the __all__ is generated __all__ = [ "get_dragon_and_tigger_player", diff --git a/src/zvt/contract/api.py b/src/zvt/contract/api.py index b33908f2..24842c2b 100644 --- a/src/zvt/contract/api.py +++ b/src/zvt/contract/api.py @@ -506,8 +506,9 @@ def df_to_db( for step in range(step_size): df_current = df.iloc[sub_size * step : sub_size * (step + 1)] + + session = get_db_session(provider=provider, data_schema=data_schema) if force_update: - session = get_db_session(provider=provider, data_schema=data_schema) ids = df_current["id"].tolist() if len(ids) == 1: sql = f'delete from `{data_schema.__tablename__}` where id = "{ids[0]}"' @@ -519,10 +520,15 @@ def df_to_db( else: current = get_data( - data_schema=data_schema, columns=[data_schema.id], provider=provider, ids=df_current["id"].tolist() + session=session, + data_schema=data_schema, + columns=[data_schema.id], + provider=provider, + ids=df_current["id"].tolist(), ) if pd_is_not_null(current): df_current = df_current[~df_current["id"].isin(current["id"])] + session.commit() if pd_is_not_null(df_current): saved = saved + len(df_current) diff --git a/src/zvt/recorders/em/em_api.py b/src/zvt/recorders/em/em_api.py index 6287b1f2..645bdf3b 100644 --- a/src/zvt/recorders/em/em_api.py +++ b/src/zvt/recorders/em/em_api.py @@ -540,7 +540,7 @@ def get_tradable_list( return pd.concat(dfs) -def get_news(entity_id, ps=200, index=1): +def get_news(entity_id, ps=200, index=1, start_timestamp=None): sec_id = to_em_sec_id(entity_id=entity_id) url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}" resp = requests.get(url) @@ -561,7 +561,7 @@ def get_news(entity_id, ps=200, index=1): json_result = demjson3.decode(json_text)["data"]["list"] resp.close() if json_result: - json_result = [ + news = [ { "id": f'{entity_id}_{item["Art_ShowTime"]}', "entity_id": entity_id, @@ -569,12 +569,15 @@ def get_news(entity_id, ps=200, index=1): "news_title": item["Art_Title"], } for item in json_result + if not start_timestamp or (to_pd_timestamp(item["Art_ShowTime"]) >= start_timestamp) ] + if len(news) < len(json_result): + return news next_data = get_news(entity_id=entity_id, ps=ps, index=index + 1) if next_data: - return json_result + next_data + return news + next_data else: - return json_result + return news # utils to transform zvt entity to em entity @@ -721,7 +724,11 @@ def to_zvt_code(code): # df = get_kdata(entity_id="future_dce_I", level="1d") # print(df) # df = get_dragon_and_tiger(code="000989", start_date="2018-10-31") - df = get_dragon_and_tiger_list(start_date="2022-04-25") + # df = get_dragon_and_tiger_list(start_date="2022-04-25") + df = get_tradable_list() + df_delist = df[df["name"].str.contains("退")] + print(df_delist[["id", "name"]].values.tolist()) + print(df) # the __all__ is generated __all__ = [ diff --git a/src/zvt/recorders/em/meta/em_stock_meta_recorder.py b/src/zvt/recorders/em/meta/em_stock_meta_recorder.py index b46cf48a..acba5a7e 100644 --- a/src/zvt/recorders/em/meta/em_stock_meta_recorder.py +++ b/src/zvt/recorders/em/meta/em_stock_meta_recorder.py @@ -5,6 +5,7 @@ from zvt.contract.recorder import Recorder from zvt.domain import Stock from zvt.recorders.em import em_api +from zvt.utils import pd_is_not_null class EMStockRecorder(Recorder): @@ -14,6 +15,14 @@ class EMStockRecorder(Recorder): def run(self): for exchange in [Exchange.sh, Exchange.sz]: df = em_api.get_tradable_list(entity_type="stock", exchange=exchange) + # df_delist = df[df["name"].str.contains("退")] + if pd_is_not_null(df): + for item in df[["id", "name"]].values.tolist(): + id = item[0] + name = item[1] + sql = f'update stock set name = "{name}" where id = "{id}"' + self.session.execute(sql) + self.session.commit() self.logger.info(df) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) diff --git a/src/zvt/recorders/em/news/em_stock_news_recorder.py b/src/zvt/recorders/em/news/em_stock_news_recorder.py index 4346126b..52025341 100644 --- a/src/zvt/recorders/em/news/em_stock_news_recorder.py +++ b/src/zvt/recorders/em/news/em_stock_news_recorder.py @@ -6,6 +6,7 @@ from zvt.domain import Stock from zvt.domain.misc.stock_news import StockNews from zvt.recorders.em import em_api +from zvt.utils import to_pd_timestamp, count_interval, now_pd_timestamp class EMStockNewsRecorder(FixedCycleDataRecorder): @@ -18,15 +19,22 @@ class EMStockNewsRecorder(FixedCycleDataRecorder): provider = "em" def record(self, entity, start, end, size, timestamps): - news = em_api.get_news(entity_id=entity.id) - df = pd.DataFrame.from_records(news) - self.logger.info(df) - df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) + if not start or (start <= to_pd_timestamp("2018-01-01")): + start = to_pd_timestamp("2018-01-01") + if count_interval(start, now_pd_timestamp()) <= 30: + ps = 30 + else: + ps = 200 + news = em_api.get_news(entity_id=entity.id, ps=ps, start_timestamp=start) + if news: + df = pd.DataFrame.from_records(news) + self.logger.info(df) + df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) if __name__ == "__main__": # Stock.record_data(provider="em") - r = EMStockNewsRecorder(entity_ids=["stock_sh_688256"]) + r = EMStockNewsRecorder(entity_ids=["stock_sz_000005"]) r.run() # the __all__ is generated __all__ = ["EMStockNewsRecorder"]