Skip to content

Commit

Permalink
V0.9.41 更新一批代码 (waditu#185)
Browse files Browse the repository at this point in the history
* 0.9.41 新增 disk cache 功能

* 0.9.41 update

* update

* 0.9.41 fix test

* update readme

* 0.9.41 优化 streamlit 组件

* 0.9.41 新增 weekly 绩效评价

* 0.9.41 fix test

* 0.9.41 新增自相关分析组件

* 0.9.41 update

* 0.9.41 update

* 0.9.41 fix test

* 0.9.41 优化 streamlit 组件
  • Loading branch information
zengbin93 authored Jan 21, 2024
1 parent 912aeb7 commit f7ba018
Show file tree
Hide file tree
Showing 15 changed files with 558 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.40 ]
branches: [ master, V0.9.41 ]
pull_request:
branches: [ master ]

Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* 已经开始用czsc库进行量化研究的朋友,欢迎[加入飞书群](https://applink.feishu.cn/client/chat/chatter/add_by_link?link_token=0bak668e-7617-452c-b935-94d2c209e6cf),快点击加入吧!
* [B站视频教程合集(持续更新...)](https://space.bilibili.com/243682308/channel/series)
* [CZSC策略圈介绍](https://s0cqcxuy3p.feishu.cn/wiki/D12bwh4SriW1Lgk23HUchFKFnpe)
* [CZSC小圈子](https://s0cqcxuy3p.feishu.cn/wiki/wikcnwXSk9mWnki1b6URPhLA2Hc)
* [CZSC代码库QA](https://zbczsc.streamlit.app/)


Expand All @@ -33,7 +33,6 @@
* 基于 Tushare 数据的择时、选股策略回测研究流程



## 安装使用

**注意:** python 版本必须大于等于 3.7
Expand All @@ -43,6 +42,11 @@
pip install [email protected]:waditu/czsc.git -U
```

直接从github指定分支安装最新版:
```
pip install git+https://github.com/waditu/[email protected] -U
```

`pypi`安装:
```
pip install czsc -U -i https://pypi.python.org/simple
Expand Down
11 changes: 9 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@
SignalAnalyzer,
SignalPerformance,
daily_performance,
weekly_performance,
net_value_stats,
subtract_fee,

home_path,
DiskCache,
disk_cache,
get_dir_size,
empty_cache_path,
print_df_sample,
Expand All @@ -94,13 +98,16 @@
# streamlit 量化分析组件
from czsc.utils.st_components import (
show_daily_return,
show_splited_daily,
show_monthly_return,
show_correlation,
show_sectional_ic,
show_factor_returns,
show_factor_layering,
show_symbol_factor_layering,
show_weight_backtest,
show_ts_rolling_corr,
show_ts_self_corr,
)

from czsc.utils.bi_info import (
Expand All @@ -119,10 +126,10 @@
find_most_similarity,
)

__version__ = "0.9.40"
__version__ = "0.9.41"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20231218"
__date__ = "20240114"


def welcome():
Expand Down
8 changes: 5 additions & 3 deletions czsc/connectors/cooperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
raise ValueError(f"symbol {symbol} 无法识别,获取数据失败!")


def stocks_daily_klines(years=None, **kwargs):
@czsc.disk_cache(path=cache_path, ttl=-1)
def stocks_daily_klines(sdt='20170101', edt="20240101", **kwargs):
"""获取全市场A股的日线数据"""
adj = kwargs.get('adj', 'hfq')
if years is None:
years = ['2017', '2018', '2019', '2020', '2021', '2022', '2023']
sdt = pd.to_datetime(sdt).year
edt = pd.to_datetime(edt).year
years = [str(year) for year in range(sdt, edt + 1)]

res = []
for year in years:
Expand Down
4 changes: 2 additions & 2 deletions czsc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from .plotly_plot import KlineChart
from .trade import cal_trade_price, update_nbars, update_bbars, update_tbars, risk_free_returns, resample_to_daily
from .cross import CrossSectionalPerformance, cross_sectional_ranker
from .stats import daily_performance, net_value_stats, subtract_fee
from .stats import daily_performance, net_value_stats, subtract_fee, weekly_performance
from .signal_analyzer import SignalAnalyzer, SignalPerformance
from .cache import home_path, get_dir_size, empty_cache_path
from .cache import home_path, get_dir_size, empty_cache_path, DiskCache, disk_cache
from .index_composition import index_composition
from .data_client import DataClient, set_url_token, get_url_token
from .oss import AliyunOSS
Expand Down
149 changes: 147 additions & 2 deletions czsc/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@
"""

import os
import time
import dill
import shutil
import hashlib
import json
import pandas as pd
from pathlib import Path
from loguru import logger
from typing import Any

home_path = os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc'))
os.makedirs(home_path, exist_ok=True)

home_path = Path(os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc')))
home_path.mkdir(parents=True, exist_ok=True)


def get_dir_size(path):
Expand All @@ -28,3 +37,139 @@ def empty_cache_path():
shutil.rmtree(home_path)
os.makedirs(home_path, exist_ok=False)
print(f"已清空缓存文件夹:{home_path}")


class DiskCache:
def __init__(self, path=None):
self.path = home_path / "disk_cache" if path is None else Path(path)
if self.path.is_file():
raise Exception("path has exist")

self.path.mkdir(parents=True, exist_ok=True)

def __str__(self) -> str:
return "DiskCache: " + str(self.path)

def is_found(self, k: str, suffix: str = "pkl", ttl=-1) -> bool:
"""判断缓存文件是否存在
:param k: 缓存文件名
:param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx
:param ttl: 缓存文件有效期,单位:秒,-1 表示永久有效
:return: bool
"""
file = self.path / f"{k}.{suffix}"
if not file.exists():
logger.info(f"文件不存在, {file}")
return False

if ttl > 0:
create_time = file.stat().st_ctime
if (time.time() - create_time) > ttl:
logger.info(f"缓存文件已过期, {file}")
return False

return file.exists()

def get(self, k: str, suffix: str = "pkl") -> Any:
"""读取缓存文件
:param k: 缓存文件名
:param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx
:return: 缓存文件内容
"""
file = self.path / f"{k}.{suffix}"
logger.info(f"正在读取缓存记录,地址:{file}")
if not file.exists():
logger.warning(f"文件不存在, {file}")
return None

if suffix == "pkl":
res = dill.load(open(file, 'rb'))
elif suffix == "json":
res = json.load(open(file, 'r', encoding='utf-8'))
elif suffix == "txt":
res = file.read_text(encoding='utf-8')
elif suffix == "csv":
res = pd.read_csv(file, encoding='utf-8')
elif suffix == "xlsx":
res = pd.read_excel(file)
else:
raise ValueError(f"suffix {suffix} not supported")
return res

def set(self, k: str, v: Any, suffix: str = "pkl"):
"""写入缓存文件
:param k: 缓存文件名
:param v: 缓存文件内容
:param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx
"""
file = self.path / f"{k}.{suffix}"
if file.exists():
logger.info(f"缓存文件 {file} 将被覆盖")

if suffix == "pkl":
dill.dump(v, open(file, 'wb'))

elif suffix == "json":
if not isinstance(v, dict):
raise ValueError("suffix json only support dict")
json.dump(v, open(file, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)

elif suffix == "txt":
if not isinstance(v, str):
raise ValueError("suffix txt only support str")
file.write_text(v, encoding='utf-8')

elif suffix == "csv":
if not isinstance(v, pd.DataFrame):
raise ValueError("suffix csv only support pd.DataFrame")
v.to_csv(file, index=False, encoding='utf-8')

elif suffix == 'xlsx':
if not isinstance(v, pd.DataFrame):
raise ValueError("suffix xlsx only support pd.DataFrame")
v.to_excel(file, index=False)

else:
raise ValueError(f"suffix {suffix} not supported")

logger.info(f"已写入缓存文件:{file}")

def remove(self, k: str, suffix: str = "pkl"):
file = self.path / f"{k}.{suffix}"
logger.info(f"准备删除缓存文件:{file}")
Path.unlink(file) if Path.exists(file) else None


def disk_cache(path: str, suffix: str = "pkl", ttl: int = -1):
"""缓存装饰器,支持多种数据格式
:param path: 缓存文件夹路径
:param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx
:param ttl: 缓存文件有效期,单位:秒
"""
assert suffix in ["pkl", "json", "txt", "csv", "xlsx"], "suffix not supported"

def decorator(func):
nonlocal path
_c = DiskCache(path=Path(path) / func.__name__)

def cached_func(*args, **kwargs):
hash_str = f"{func.__name__}{args}{kwargs}"
k = hashlib.md5(hash_str.encode('utf-8')).hexdigest().upper()[:8]
k = f"{k}_{func.__name__}"

if _c.is_found(k, suffix=suffix, ttl=ttl):
output = _c.get(k, suffix=suffix)
return output

else:
output = func(*args, **kwargs)
_c.set(k, output, suffix=suffix)
return output

return cached_func

return decorator
Loading

0 comments on commit f7ba018

Please sign in to comment.