Skip to content

Commit

Permalink
Fix TopkDropoutStrategy && dump_bin
Browse files Browse the repository at this point in the history
  • Loading branch information
zhupr committed Nov 25, 2020
1 parent 88b6fc4 commit c14a99a
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 46 deletions.
168 changes: 127 additions & 41 deletions qlib/contrib/strategy/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_risk_degree(self, date):

def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
"""
Parameters:
Parameters
-----------
score_series : pd.Seires
stock_id , score
Expand All @@ -46,7 +46,7 @@ def generate_order_list(self, score_series, current, trade_exchange, pred_date,

def update(self, score_series, pred_date, trade_date):
"""User can use this method to update strategy state each trade date.
Parameters:
Parameters
-----------
score_series : pd.Series
stock_id , score
Expand Down Expand Up @@ -140,20 +140,23 @@ def __init__(self, order_generator_cls_or_obj=OrderGenWInteract, *args, **kwargs

def generate_target_weight_position(self, score, current, trade_date):
"""
Parameters:
Parameters
-----------
score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
current : current position, use Position() class
trade_exchange : Exchange()
trade_date : trade date
score : pd.Series
pred score for this trade date, index is stock_id, contain 'score' column
current : Position
current position, use Position() class
trade_exchange : Exchange
trade_date : str, pd.Timestamp
trade date
generate target position from score for this date and the current position
The cash is not considered in the position
"""
raise NotImplementedError()

def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
"""
Parameters:
Parameters
----------
score_series : pd.Seires
stock_id , score
Expand Down Expand Up @@ -186,36 +189,57 @@ def generate_order_list(self, score_series, current, trade_exchange, pred_date,


class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs):
def __init__(
self,
topk,
n_drop,
method_sell="bottom",
method_buy="top",
risk_degree=0.95,
thresh=1,
hold_thresh=1,
only_tradable=False,
**kwargs,
):
"""
Parameters:
-----------
Parameters
----------
topk : int
The number of stocks in the portfolio
n_drop : int
number of stocks to be replaced in each trading date
method : str
dropout method, random/bottom
method_sell : str
dropout method_sell, random/bottom
method_buy : str
dropout method_buy, random/top
risk_degree : float
position percentage of total value
thresh : int
minimun holding days since last buy singal of the stock
hold_thresh : int
minimum holding days
before sell stock , will check current.get_stock_count(order.stock_id) >= self.thresh
only_tradable : bool
will the strategy only consider the tradable stock when buying and selling.
if only_tradable:
strategy will make buy sell decision without checking the tradable state of the stock
else:
strategy will make decision with the tradable state of the stock info and avoid buy and sell them
"""
super(TopkDropoutStrategy, self).__init__()
ListAdjustTimer.__init__(self, kwargs.get("adjust_dates", None))
self.topk = topk
self.n_drop = n_drop
self.method = method
self.method_sell = method_sell
self.method_buy = method_buy
self.risk_degree = risk_degree
self.thresh = thresh
# self.stock_count['code'] will be the days the stock has been hold
# since last buy signal. This is designed for thresh
self.stock_count = {}

self.hold_thresh = hold_thresh
self.only_tradable = only_tradable

def get_risk_degree(self, date):
"""get_risk_degree
Expand All @@ -226,42 +250,102 @@ def get_risk_degree(self, date):
return self.risk_degree

def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
"""
Gnererate order list according to score_series at trade_date, will not change current.
Parameters:
-----------
score_series : pd.Series
stock_id , score
current : Position()
current of account
trade_exchange : Exchange()
exchange
pred_date : pd.Timestamp
predict date
trade_date : pd.Timestamp
trade date
"""Gnererate order list according to score_series at trade_date.
will not change current.
Parameters
----------
score_series : pd.Seires
stock_id , score
current : Position()
current of account
trade_exchange : Exchange()
exchange
pred_date : pd.Timestamp
predict date
trade_date : pd.Timestamp
trade date
"""
if not self.is_adjust(trade_date):
return []

if self.only_tradable:
# If The strategy only consider tradable stock when make decision
# It needs following actions to filter stocks
def get_first_n(l, n, reverse=False):
cur_n = 0
res = []
for si in reversed(l) if reverse else l:
if trade_exchange.is_stock_tradable(stock_id=si, trade_date=trade_date):
res.append(si)
cur_n += 1
if cur_n >= n:
break
return res[::-1] if reverse else res

def get_last_n(l, n):
return get_first_n(l, n, reverse=True)

def filter_stock(l):
return [si for si in l if trade_exchange.is_stock_tradable(stock_id=si, trade_date=trade_date)]

else:
# Otherwise, the stock will make decision with out the stock tradable info
def get_first_n(l, n):
return list(l)[:n]

def get_last_n(l, n):
return list(l)[-n:]

def filter_stock(l):
return l

current_temp = copy.deepcopy(current)
# generate order list for this adjust date
sell_order_list = []
buy_order_list = []
# load score
cash = current_temp.get_cash()
current_stock_list = current_temp.get_stock_list()
# last position (sorted by score)
last = score_series.reindex(current_stock_list).sort_values(ascending=False).index
today = (
score_series[~score_series.index.isin(last)]
.sort_values(ascending=False)
.index[: self.n_drop + self.topk - len(last)]
)
comb = score_series.reindex(last.union(today)).sort_values(ascending=False).index
if self.method == "bottom":
sell = last[last.isin(comb[-self.n_drop :])]
elif self.method == "random":
sell = pd.Index(np.random.choice(last, self.n_drop) if len(last) else [])
# The new stocks today want to buy **at most**
if self.method_buy == "top":
today = get_first_n(
score_series[~score_series.index.isin(last)].sort_values(ascending=False).index,
self.n_drop + self.topk - len(last),
)
elif self.method_buy == "random":
topk_candi = get_first_n(score_series.sort_values(ascending=False).index, self.topk)
candi = list(filter(lambda x: x not in last, topk_candi))
n = self.n_drop + self.topk - len(last)
try:
today = np.random.choice(candi, n, replace=False)
except ValueError:
today = candi
else:
raise NotImplementedError(f"This type of input is not supported")
# combine(new stocks + last stocks), we will drop stocks from this list
# In case of dropping higher score stock and buying lower score stock.
comb = score_series.reindex(last.union(pd.Index(today))).sort_values(ascending=False).index

# Get the stock list we really want to sell (After filtering the case that we sell high and buy low)
if self.method_sell == "bottom":
sell = last[last.isin(get_last_n(comb, self.n_drop))]
elif self.method_sell == "random":
candi = filter_stock(last)
try:
sell = pd.Index(np.random.choice(candi, self.n_drop, replace=False) if len(last) else [])
except ValueError: # No enough candidates
sell = candi
else:
raise NotImplementedError(f"This type of input is not supported")

# Get the stock list we really want to buy
buy = today[: len(sell) + self.topk - len(last)]

# buy singal: if a stock falls into topk, it appear in the buy_sinal
buy_signal = score_series.sort_values(ascending=False).iloc[: self.topk].index

for code in current_stock_list:
if not trade_exchange.is_stock_tradable(stock_id=code, trade_date=trade_date):
continue
Expand All @@ -285,12 +369,14 @@ def generate_order_list(self, score_series, current, trade_exchange, pred_date,
if trade_exchange.check_order(sell_order):
sell_order_list.append(sell_order)
trade_val, trade_cost, trade_price = trade_exchange.deal_order(sell_order, position=current_temp)
# update cash
cash += trade_val - trade_cost
# sold
del self.stock_count[code]
else:
# no buy signal, but the stock is kept
self.stock_count[code] += 1
elif code in buy:
elif code in buy_signal:
# NOTE: This is different from the original version
# get new buy signal
# Only the stock fall in to topk will produce buy signal
Expand All @@ -300,7 +386,7 @@ def generate_order_list(self, score_series, current, trade_exchange, pred_date,
# buy new stock
# note the current has been changed
current_stock_list = current_temp.get_stock_list()
value = current_temp.get_cash() * self.risk_degree / len(buy) if len(buy) > 0 else 0
value = cash * self.risk_degree / len(buy) if len(buy) > 0 else 0

# open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not consider it
# as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
Expand Down
4 changes: 4 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ python get_data.py qlib_data --help

### US data

> Need to download data first: [Downlaod US Data](#Downlaod-US-Data)
```python
import qlib
from qlib.config import REG_US
Expand All @@ -52,6 +54,8 @@ qlib.init(provider_uri=provider_uri, region=REG_US)

### CN data

> Need to download data first: [Download CN Data](#Download-CN-Data)
```python
import qlib
from qlib.config import REG_CN
Expand Down
10 changes: 5 additions & 5 deletions scripts/dump_bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _get_date(

def _get_source_data(self, file_path: Path) -> pd.DataFrame:
df = pd.read_csv(str(file_path.resolve()), low_memory=False)
df[self.date_field_name] = df[self.date_field_name].astype(np.datetime64)
df[self.date_field_name] = df[self.date_field_name].astype(str).astype(np.datetime64)
# df.drop_duplicates([self.date_field_name], inplace=True)
return df

Expand Down Expand Up @@ -339,10 +339,10 @@ def _dump_instruments(self):
def dump(self):
self._calendars_list = self._read_calendars(self._calendars_dir.joinpath(f"{self.freq}.txt"))
# noinspection PyAttributeOutsideInit
self._old_instruments = self._read_instruments(
self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME)
).to_dict(
orient="index"
self._old_instruments = (
self._read_instruments(self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME))
.set_index([self.symbol_field_name])
.to_dict(orient="index")
) # type: dict
self._dump_instruments()
self._dump_features()
Expand Down

0 comments on commit c14a99a

Please sign in to comment.