Skip to content

Commit

Permalink
V0.9.39 更新一批代码 (waditu#181)
Browse files Browse the repository at this point in the history
* 0.9.39 新增两个 rolling 工具函数

* 0.9.39 加入2024交易日历

* 0.9.39 update

* 0.9.39 新增特征计算相关工具函数

* 0.9.39 update
  • Loading branch information
zengbin93 authored Dec 17, 2023
1 parent 8950731 commit c55a00b
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.38 ]
branches: [ master, V0.9.39 ]
pull_request:
branches: [ master ]

Expand Down
8 changes: 6 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,16 @@
normalize_feature,
normalize_ts_feature,
feture_cross_layering,
rolling_rank,
rolling_norm,
rolling_qcut,
find_most_similarity,
)

__version__ = "0.9.38"
__version__ = "0.9.39"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20231126"
__date__ = "20231212"


def welcome():
Expand Down
2 changes: 2 additions & 0 deletions czsc/connectors/cooperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
email: [email protected]
create_dt: 2023/11/15 20:45
describe: CZSC开源协作团队内部使用数据接口
接口说明:https://s0cqcxuy3p.feishu.cn/wiki/F3HGw9vDPisWtSkJr1ac5DEcnNh
"""
import os
import czsc
Expand Down
3 changes: 2 additions & 1 deletion czsc/traders/weight_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ def __init__(self, dfw, digits=2, **kwargs) -> None:
self.fee_rate = kwargs.get('fee_rate', 0.0002)
self.dfw['weight'] = self.dfw['weight'].astype('float').round(digits)
self.symbols = list(self.dfw['symbol'].unique().tolist())
self.results = self.backtest(n_jobs=kwargs.get('n_jobs', int(cpu_count() / 2)))
default_n_jobs = min(cpu_count() // 2, len(self.symbols))
self.results = self.backtest(n_jobs=kwargs.get('n_jobs', default_n_jobs))

def get_symbol_daily(self, symbol):
"""获取某个合约的每日收益率
Expand Down
9 changes: 9 additions & 0 deletions czsc/utils/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
calendar = pd.read_feather(Path(__file__).parent / "china_calendar.feather")


def prepare_chain_calendar():
import tushare as ts
pro = ts.pro_api()
df = pro.trade_cal(exchange='', start_date='20100101', end_date='20301231')
df['cal_date'] = pd.to_datetime(df['cal_date'])
df = df.sort_values('cal_date').reset_index(drop=True)[['cal_date', 'is_open']]
df.to_feather(Path(__file__).parent / "china_calendar.feather")


def is_trading_date(date=datetime.now()):
"""判断是否是交易日"""
date = pd.to_datetime(pd.to_datetime(date).date())
Expand Down
Binary file modified czsc/utils/china_calendar.feather
Binary file not shown.
4 changes: 4 additions & 0 deletions czsc/utils/data_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ def get_url_token(url):
if file_token.exists():
return open(file_token, 'r', encoding='utf-8').read()
logger.warning(f"请设置 {url} 的访问凭证码,如果没有请联系管理员申请")
token = input(f"请输入 {url} 的访问凭证码(token):")
if token:
set_url_token(token, url)
return token
return None


Expand Down
109 changes: 109 additions & 0 deletions czsc/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,112 @@ def _layering(x):
df[f"{x_col}分层"] = df[f"{x_col}分层"].fillna(-1)
df[f'{x_col}分层'] = df[f'{x_col}分层'].apply(lambda x: f'第{str(int(x+1)).zfill(2)}层')
return df


def rolling_rank(df: pd.DataFrame, col, n=None, new_col=None, **kwargs):
"""计算序列的滚动排名
:param df: pd.DataFrame
待计算的数据
:param col: str
待计算的列
:param n: int
滚动窗口大小, 默认为None, 表示计算 expanding 排名,否则计算 rolling 排名
:param new_col: str
新列名,默认为 None, 表示使用 f'{col}_rank' 作为新列名
:param kwargs:
min_periods: int
最小计算周期
"""
min_periods = kwargs.get('min_periods', 2)
new_col = new_col if new_col else f'{col}_rank'
if n is None:
df[new_col] = df[col].expanding(min_periods=min_periods).rank()
else:
df[new_col] = df[col].rolling(window=n, min_periods=min_periods).rank()
df[new_col] = df[new_col].fillna(0)


def rolling_norm(df: pd.DataFrame, col, n=None, new_col=None, **kwargs):
"""计算序列的滚动归一化值
:param df: pd.DataFrame
待计算的数据
:param col: str
待计算的列
:param n: int
滚动窗口大小, 默认为None, 表示计算 expanding ,否则计算 rolling
:param new_col: str
新列名,默认为 None, 表示使用 f'{col}_norm' 作为新列名
:param kwargs:
min_periods: int
最小计算周期
"""
min_periods = kwargs.get('min_periods', 2)
new_col = new_col if new_col else f'{col}_norm'

if n is None:
df[new_col] = df[col].expanding(min_periods=min_periods).apply(lambda x: (x[-1] - x.mean()) / x.std(), raw=True)
else:
df[new_col] = df[col].rolling(window=n, min_periods=min_periods).apply(lambda x: (x[-1] - x.mean()) / x.std(), raw=True)
df[new_col] = df[new_col].fillna(0)


def rolling_qcut(df: pd.DataFrame, col, n=None, new_col=None, **kwargs):
"""计算序列的滚动分位数
:param df: pd.DataFrame
待计算的数据
:param col: str
待计算的列
:param n: int
滚动窗口大小, 默认为None, 表示计算 expanding ,否则计算 rolling
:param new_col: str
新列名,默认为 None, 表示使用 f'{col}_qcut' 作为新列名
:param kwargs:
- min_periods: int 最小计算周期
- q: int 分位数数量
"""
q = kwargs.get('q', 10)
min_periods = kwargs.get('min_periods', q)
new_col = new_col if new_col else f'{col}_qcut'

def __qcut_func(x):
return pd.qcut(x, q=q, labels=False, duplicates='drop')[-1]

if n is None:
df[new_col] = df[col].expanding(min_periods=min_periods).apply(__qcut_func, raw=True)
else:
df[new_col] = df[col].rolling(window=n, min_periods=min_periods).apply(__qcut_func, raw=True)
df[new_col] = df[new_col].fillna(-1)


def find_most_similarity(vector: pd.Series, matrix: pd.DataFrame, n=10, metric='cosine', **kwargs):
"""寻找向量在矩阵中最相似的n个向量
:param vector: 1维向量, Series结构
:param matrix: 2维矩阵, DataFrame结构, 每一列是一个向量,列名是向量的标记
:param n: int, 返回最相似的n个向量
:param metric: str, 计算相似度的方法,
- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
'manhattan']. These metrics support sparse matrix
inputs.
['nan_euclidean'] but it does not yet support sparse matrices.
- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',
'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']
See the documentation for scipy.spatial.distance for details on these
metrics. These metrics do not support sparse matrix inputs.
:param kwargs: 其他参数
"""
from sklearn.metrics.pairwise import pairwise_distances
metric = kwargs.get('metric', 'cosine')
sim = pairwise_distances(vector.values.reshape(1, -1), matrix.T, metric=metric).reshape(-1)
sim = pd.Series(sim, index=matrix.columns)
sim = sim.sort_values(ascending=False)[:n]
return sim
5 changes: 3 additions & 2 deletions czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ def show_sectional_ic(df, x_col, y_col, method='pearson', **kwargs):
col4.dataframe(dfm.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS'),
use_container_width=True)

fig = px.histogram(df, x=x_col, marginal="box", title="因子数据分布图")
st.plotly_chart(fig, use_container_width=True)
if kwargs.get("show_factor_histgram", False):
fig = px.histogram(df, x=x_col, marginal="box", title="因子数据分布图")
st.plotly_chart(fig, use_container_width=True)


def show_factor_returns(df, x_col, y_col):
Expand Down
14 changes: 10 additions & 4 deletions test/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@


def test_is_trading_date():
assert is_trading_date('2023-09-08') == True
assert is_trading_date('2023-09-09') == False
assert is_trading_date('2023-09-10') == False
assert is_trading_date('2023-09-10 12:00') == False
test_cases = [
('2023-09-08', True),
('2023-09-09', False),
('2023-09-10', False),
('2023-09-10 12:00', False),
('2024-04-18', True),
('2024-01-13', False)
]
for date, expected_result in test_cases:
assert is_trading_date(date) == expected_result


def test_next_trading_date():
Expand Down
87 changes: 87 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
create_dt: 2022/2/16 20:31
describe: czsc.utils 单元测试
"""
import sys
import pytest
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -117,3 +118,89 @@ def test_daily_performance():
result = daily_performance([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01])
assert result == {'年化': 2.016, '夏普': 5, '最大回撤': 0.02, '卡玛': 10, '日胜率': 0.7,
'年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7, '最大新高时间': 4}


def test_find_most_similarity():
from czsc.utils.features import find_most_similarity

# 创建一个向量和一个矩阵
vector = pd.Series(np.random.rand(10))
matrix = pd.DataFrame(np.random.rand(10, 100))

# 调用函数
result = find_most_similarity(vector, matrix, n=5, metric='cosine')

# 检查结果的类型
assert isinstance(result, pd.Series)

# 检查结果的长度im
assert len(result) == 5

# 检查结果的索引
assert all(isinstance(index, int) for index in result.index)

# 检查结果的值
assert all(0 <= value <= 1 for value in result.values)


def test_rolling_qcut():
from czsc.utils.features import rolling_qcut

# 创建一个DataFrame
df = pd.DataFrame({
'col1': np.random.rand(100),
})

# 调用函数
rolling_qcut(df, 'col1', n=10, new_col='col1_qcut', q=5, min_periods=5)

# 检查新列是否已添加到df
assert 'col1_qcut' in df.columns

# 检查新列的长度
assert len(df['col1_qcut']) == len(df['col1'])

# 检查新列的值
assert all(-1 <= value < 5 for value in df['col1_qcut'].dropna())


def test_rolling_norm():
from czsc.utils.features import rolling_norm

df = pd.DataFrame({
'col1': np.random.rand(100),
})

# 调用函数
rolling_norm(df, 'col1', n=10, new_col='col1_norm')

# 检查新列是否已添加到df
assert 'col1_norm' in df.columns

# 检查新列的长度
assert len(df['col1_norm']) == len(df['col1'])

# 检查新列的值
assert all(-3 <= value <= 3 for value in df['col1_norm'].dropna())


@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher")
def test_rolling_rank():
from czsc.utils.features import rolling_rank

# 创建一个DataFrame
df = pd.DataFrame({
'col1': np.random.rand(100),
})

# 调用函数
rolling_rank(df, 'col1', n=10, new_col='col1_rank')

# 检查新列是否已添加到df
assert 'col1_rank' in df.columns

# 检查新列的长度
assert len(df['col1_rank']) == len(df['col1'])

# 检查新列的值
assert all(0 <= value <= 100 for value in df['col1_rank'].dropna())

0 comments on commit c55a00b

Please sign in to comment.