Skip to content

Commit

Permalink
Ignoring NaN in calculations, caused crashes (it was not used anyway!)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbdesignpro committed Jul 17, 2020
1 parent 4862f28 commit a6af83b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
7 changes: 7 additions & 0 deletions sweetviz/dataframe_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import time
import pandas as pd
from numpy import isnan
from tqdm import tqdm

from sweetviz.sv_types import NumWithPercent, FeatureToProcess, FeatureType
Expand Down Expand Up @@ -419,11 +420,17 @@ def mirror_association(association_dict, feature_name, other_name, value):
# NUM-NUM
cur_associations[other.source.name] = \
feature.source.corr(other.source, method='pearson')
# TODO: display correlation error better in graph!
if isnan(cur_associations[other.source.name]):
cur_associations[other.source.name] = 0.0
mirror_association(self._associations, feature_name, other.source.name, \
cur_associations[other.source.name])
if process_compare:
cur_associations_compare[other.source.name] = \
feature.compare.corr(other.compare, method='pearson')
# TODO: display correlation error better in graph!
if isnan(cur_associations_compare[other.source.name]):
cur_associations_compare[other.source.name] = 0.0
mirror_association(self._associations_compare, feature_name, other.source.name, \
cur_associations_compare[other.source.name])
self.progress_bar.update(1)
13 changes: 9 additions & 4 deletions sweetviz/series_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,30 @@ def get_counts(series: pd.Series) -> dict:
value_counts_without_nan = (
value_counts_with_nan.reset_index().dropna().set_index("index").iloc[:, 0]
)
distinct_count_with_nan = value_counts_with_nan.count()
# IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
# distinct_count_with_nan = value_counts_with_nan.count()
distinct_count_without_nan = value_counts_without_nan.count()

# Convert indices to strings (helps with referencing later)
# value_counts_without_nan.index = value_counts_without_nan.index.map(str)

return {
# "value_counts": value_counts_without_nan, # Alias
"value_counts_with_nan": value_counts_with_nan,
# IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
# "value_counts_with_nan": value_counts_with_nan,
"value_counts_without_nan": value_counts_without_nan,
"distinct_count_with_nan": distinct_count_with_nan,
# IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
# "distinct_count_with_nan": distinct_count_with_nan,
"distinct_count_without_nan": distinct_count_without_nan,
"num_rows_with_data": series.count(),
"num_rows_total": len(series),
}


def fill_out_missing_counts_in_other_series(my_counts:dict, other_counts:dict):
to_fill_list = ["value_counts_with_nan", "value_counts_without_nan"]
# IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
# to_fill_list = ["value_counts_with_nan", "value_counts_without_nan"]
to_fill_list = ["value_counts_without_nan"]
for to_fill in to_fill_list:
for key, value in other_counts[to_fill].items():
if key not in my_counts[to_fill]:
Expand Down

0 comments on commit a6af83b

Please sign in to comment.