1
- #
2
- # This code is deprecated. It is recommended that you use
3
- # the numerai-tools package instead:
4
- # https://github.com/numerai/numerai-tools
5
- #
6
- # See the notebooks for examples.
7
- #
1
+ """
2
+ THIS MODULE IS DEPRECATED. Use numerai-tools:
3
+ https://github.com/numerai/numerai-tools
4
+
5
+ If there is a feature missing from numerai-tools, please
6
+ open an issue with a link to the function in this file you'd
7
+ like to see.
8
+ """
8
9
9
10
import numpy as np
10
11
import pandas as pd
11
12
import scipy
12
- from halo import Halo
13
13
from tqdm import tqdm
14
14
from pathlib import Path
15
15
import json
16
- from scipy .stats import skew
17
16
18
17
ERA_COL = "era"
19
18
TARGET_COL = "target_cyrus_v4_20"
20
19
DATA_TYPE_COL = "data_type"
21
20
EXAMPLE_PREDS_COL = "example_preds"
22
-
23
- spinner = Halo (text = "" , spinner = "dots" )
24
-
25
21
MODEL_FOLDER = "models"
26
22
MODEL_CONFIGS_FOLDER = "model_configs"
27
23
PREDICTION_FILES_FOLDER = "prediction_files"
28
24
29
25
30
26
def save_prediction (df , name ):
27
+ """DEPRECATED"""
31
28
try :
32
29
Path (PREDICTION_FILES_FOLDER ).mkdir (exist_ok = True , parents = True )
33
30
except Exception as ex :
@@ -36,6 +33,7 @@ def save_prediction(df, name):
36
33
37
34
38
35
def save_model (model , name ):
36
+ """DEPRECATED"""
39
37
try :
40
38
Path (MODEL_FOLDER ).mkdir (exist_ok = True , parents = True )
41
39
except Exception as ex :
@@ -44,6 +42,7 @@ def save_model(model, name):
44
42
45
43
46
44
def load_model (name ):
45
+ """DEPRECATED"""
47
46
path = Path (f"{ MODEL_FOLDER } /{ name } .pkl" )
48
47
if path .is_file ():
49
48
model = pd .read_pickle (f"{ MODEL_FOLDER } /{ name } .pkl" )
@@ -53,6 +52,7 @@ def load_model(name):
53
52
54
53
55
54
def save_model_config (model_config , model_name ):
55
+ """DEPRECATED"""
56
56
try :
57
57
Path (MODEL_CONFIGS_FOLDER ).mkdir (exist_ok = True , parents = True )
58
58
except Exception as ex :
@@ -62,6 +62,7 @@ def save_model_config(model_config, model_name):
62
62
63
63
64
64
def load_model_config (model_name ):
65
+ """DEPRECATED"""
65
66
path_str = f"{ MODEL_CONFIGS_FOLDER } /{ model_name } .json"
66
67
path = Path (path_str )
67
68
if path .is_file ():
@@ -73,6 +74,7 @@ def load_model_config(model_name):
73
74
74
75
75
76
def get_biggest_change_features (corrs , n ):
77
+ """DEPRECATED"""
76
78
all_eras = corrs .index .sort_values ()
77
79
h1_eras = all_eras [: len (all_eras ) // 2 ]
78
80
h2_eras = all_eras [len (all_eras ) // 2 :]
@@ -86,6 +88,7 @@ def get_biggest_change_features(corrs, n):
86
88
87
89
88
90
def get_time_series_cross_val_splits (data , cv = 3 , embargo = 12 ):
91
+ """DEPRECATED"""
89
92
all_train_eras = data [ERA_COL ].unique ()
90
93
len_split = len (all_train_eras ) // cv
91
94
test_splits = [
@@ -131,6 +134,7 @@ def neutralize(
131
134
era_col = "era" ,
132
135
verbose = False ,
133
136
):
137
+ """DEPRECATED"""
134
138
if neutralizers is None :
135
139
neutralizers = []
136
140
unique_eras = df [era_col ].unique ()
@@ -165,6 +169,7 @@ def neutralize(
165
169
166
170
167
171
def neutralize_series (series , by , proportion = 1.0 ):
172
+ """DEPRECATED"""
168
173
scores = series .values .reshape (- 1 , 1 )
169
174
exposures = by .values .reshape (- 1 , 1 )
170
175
@@ -182,11 +187,13 @@ def neutralize_series(series, by, proportion=1.0):
182
187
183
188
184
189
def unif (df ):
190
+ """DEPRECATED"""
185
191
x = (df .rank (method = "first" ) - 0.5 ) / len (df )
186
192
return pd .Series (x , index = df .index )
187
193
188
194
189
195
def numerai_corr (preds , target ):
196
+ """DEPRECATED"""
190
197
# rank (keeping ties) then gaussianize predictions to standardize prediction distributions
191
198
ranked_preds = (preds .rank (method = "average" ).values - 0.5 ) / preds .count ()
192
199
gauss_ranked_preds = scipy .stats .norm .ppf (ranked_preds )
@@ -202,6 +209,7 @@ def numerai_corr(preds, target):
202
209
def get_feature_neutral_mean (
203
210
df , prediction_col , target_col , features_for_neutralization = None
204
211
):
212
+ """DEPRECATED"""
205
213
if features_for_neutralization is None :
206
214
features_for_neutralization = [c for c in df .columns if c .startswith ("feature" )]
207
215
df .loc [:, "neutral_sub" ] = neutralize (
@@ -218,6 +226,7 @@ def get_feature_neutral_mean(
218
226
def get_feature_neutral_mean_tb_era (
219
227
df , prediction_col , target_col , tb , features_for_neutralization = None
220
228
):
229
+ """DEPRECATED"""
221
230
if features_for_neutralization is None :
222
231
features_for_neutralization = [c for c in df .columns if c .startswith ("feature" )]
223
232
temp_df = df .reset_index (
@@ -234,6 +243,7 @@ def get_feature_neutral_mean_tb_era(
234
243
235
244
236
245
def fast_score_by_date (df , columns , target , tb = None , era_col = "era" ):
246
+ """DEPRECATED"""
237
247
unique_eras = df [era_col ].unique ()
238
248
computed = []
239
249
for u in unique_eras :
@@ -258,6 +268,7 @@ def fast_score_by_date(df, columns, target, tb=None, era_col="era"):
258
268
259
269
260
270
def exposure_dissimilarity_per_era (df , prediction_col , example_col , feature_cols = None ):
271
+ """DEPRECATED"""
261
272
if feature_cols is None :
262
273
feature_cols = [c for c in df .columns if c .startswith ("feature" )]
263
274
u = df .loc [:, feature_cols ].corrwith (df [prediction_col ])
@@ -273,6 +284,7 @@ def validation_metrics(
273
284
target_col = TARGET_COL ,
274
285
features_for_neutralization = None ,
275
286
):
287
+ """DEPRECATED"""
276
288
validation_stats = pd .DataFrame ()
277
289
feature_cols = [c for c in validation_data if c .startswith ("feature_" )]
278
290
for pred_col in pred_cols :
0 commit comments