Skip to content

Commit 39dedfc

Browse files
authored
Merge pull request #171 from numerai/ndharasz/deprecate-utils
Deprecate utils.py
2 parents 96cf527 + 44f2cea commit 39dedfc

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

utils.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,30 @@
1-
#
2-
# This code is deprecated. It is recommended that you use
3-
# the numerai-tools package instead:
4-
# https://github.com/numerai/numerai-tools
5-
#
6-
# See the notebooks for examples.
7-
#
1+
"""
2+
THIS MODULE IS DEPRECATED. Use numerai-tools:
3+
https://github.com/numerai/numerai-tools
4+
5+
If there is a feature missing from numerai-tools, please
6+
open an issue with a link to the function in this file you'd
7+
like to see.
8+
"""
89

910
import numpy as np
1011
import pandas as pd
1112
import scipy
12-
from halo import Halo
1313
from tqdm import tqdm
1414
from pathlib import Path
1515
import json
16-
from scipy.stats import skew
1716

1817
ERA_COL = "era"
1918
TARGET_COL = "target_cyrus_v4_20"
2019
DATA_TYPE_COL = "data_type"
2120
EXAMPLE_PREDS_COL = "example_preds"
22-
23-
spinner = Halo(text="", spinner="dots")
24-
2521
MODEL_FOLDER = "models"
2622
MODEL_CONFIGS_FOLDER = "model_configs"
2723
PREDICTION_FILES_FOLDER = "prediction_files"
2824

2925

3026
def save_prediction(df, name):
27+
"""DEPRECATED"""
3128
try:
3229
Path(PREDICTION_FILES_FOLDER).mkdir(exist_ok=True, parents=True)
3330
except Exception as ex:
@@ -36,6 +33,7 @@ def save_prediction(df, name):
3633

3734

3835
def save_model(model, name):
36+
"""DEPRECATED"""
3937
try:
4038
Path(MODEL_FOLDER).mkdir(exist_ok=True, parents=True)
4139
except Exception as ex:
@@ -44,6 +42,7 @@ def save_model(model, name):
4442

4543

4644
def load_model(name):
45+
"""DEPRECATED"""
4746
path = Path(f"{MODEL_FOLDER}/{name}.pkl")
4847
if path.is_file():
4948
model = pd.read_pickle(f"{MODEL_FOLDER}/{name}.pkl")
@@ -53,6 +52,7 @@ def load_model(name):
5352

5453

5554
def save_model_config(model_config, model_name):
55+
"""DEPRECATED"""
5656
try:
5757
Path(MODEL_CONFIGS_FOLDER).mkdir(exist_ok=True, parents=True)
5858
except Exception as ex:
@@ -62,6 +62,7 @@ def save_model_config(model_config, model_name):
6262

6363

6464
def load_model_config(model_name):
65+
"""DEPRECATED"""
6566
path_str = f"{MODEL_CONFIGS_FOLDER}/{model_name}.json"
6667
path = Path(path_str)
6768
if path.is_file():
@@ -73,6 +74,7 @@ def load_model_config(model_name):
7374

7475

7576
def get_biggest_change_features(corrs, n):
77+
"""DEPRECATED"""
7678
all_eras = corrs.index.sort_values()
7779
h1_eras = all_eras[: len(all_eras) // 2]
7880
h2_eras = all_eras[len(all_eras) // 2 :]
@@ -86,6 +88,7 @@ def get_biggest_change_features(corrs, n):
8688

8789

8890
def get_time_series_cross_val_splits(data, cv=3, embargo=12):
91+
"""DEPRECATED"""
8992
all_train_eras = data[ERA_COL].unique()
9093
len_split = len(all_train_eras) // cv
9194
test_splits = [
@@ -131,6 +134,7 @@ def neutralize(
131134
era_col="era",
132135
verbose=False,
133136
):
137+
"""DEPRECATED"""
134138
if neutralizers is None:
135139
neutralizers = []
136140
unique_eras = df[era_col].unique()
@@ -165,6 +169,7 @@ def neutralize(
165169

166170

167171
def neutralize_series(series, by, proportion=1.0):
172+
"""DEPRECATED"""
168173
scores = series.values.reshape(-1, 1)
169174
exposures = by.values.reshape(-1, 1)
170175

@@ -182,11 +187,13 @@ def neutralize_series(series, by, proportion=1.0):
182187

183188

184189
def unif(df):
190+
"""DEPRECATED"""
185191
x = (df.rank(method="first") - 0.5) / len(df)
186192
return pd.Series(x, index=df.index)
187193

188194

189195
def numerai_corr(preds, target):
196+
"""DEPRECATED"""
190197
# rank (keeping ties) then gaussianize predictions to standardize prediction distributions
191198
ranked_preds = (preds.rank(method="average").values - 0.5) / preds.count()
192199
gauss_ranked_preds = scipy.stats.norm.ppf(ranked_preds)
@@ -202,6 +209,7 @@ def numerai_corr(preds, target):
202209
def get_feature_neutral_mean(
203210
df, prediction_col, target_col, features_for_neutralization=None
204211
):
212+
"""DEPRECATED"""
205213
if features_for_neutralization is None:
206214
features_for_neutralization = [c for c in df.columns if c.startswith("feature")]
207215
df.loc[:, "neutral_sub"] = neutralize(
@@ -218,6 +226,7 @@ def get_feature_neutral_mean(
218226
def get_feature_neutral_mean_tb_era(
219227
df, prediction_col, target_col, tb, features_for_neutralization=None
220228
):
229+
"""DEPRECATED"""
221230
if features_for_neutralization is None:
222231
features_for_neutralization = [c for c in df.columns if c.startswith("feature")]
223232
temp_df = df.reset_index(
@@ -234,6 +243,7 @@ def get_feature_neutral_mean_tb_era(
234243

235244

236245
def fast_score_by_date(df, columns, target, tb=None, era_col="era"):
246+
"""DEPRECATED"""
237247
unique_eras = df[era_col].unique()
238248
computed = []
239249
for u in unique_eras:
@@ -258,6 +268,7 @@ def fast_score_by_date(df, columns, target, tb=None, era_col="era"):
258268

259269

260270
def exposure_dissimilarity_per_era(df, prediction_col, example_col, feature_cols=None):
271+
"""DEPRECATED"""
261272
if feature_cols is None:
262273
feature_cols = [c for c in df.columns if c.startswith("feature")]
263274
u = df.loc[:, feature_cols].corrwith(df[prediction_col])
@@ -273,6 +284,7 @@ def validation_metrics(
273284
target_col=TARGET_COL,
274285
features_for_neutralization=None,
275286
):
287+
"""DEPRECATED"""
276288
validation_stats = pd.DataFrame()
277289
feature_cols = [c for c in validation_data if c.startswith("feature_")]
278290
for pred_col in pred_cols:

0 commit comments

Comments
 (0)