Ignoring NaN in calculations, caused crashes (it was not used anyway!)

XrosLiang · Jul 17, 2020 · a6af83b · a6af83b
1 parent 4862f28
commit a6af83b
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 4 deletions.
diff --git a/sweetviz/dataframe_report.py b/sweetviz/dataframe_report.py
@@ -2,6 +2,7 @@
 import os
 import time
 import pandas as pd
+from numpy import isnan
 from tqdm import tqdm
 
 from sweetviz.sv_types import NumWithPercent, FeatureToProcess, FeatureType
@@ -419,11 +420,17 @@ def mirror_association(association_dict, feature_name, other_name, value):
  # NUM-NUM
  cur_associations[other.source.name] = \
  feature.source.corr(other.source, method='pearson')
+ # TODO: display correlation error better in graph!
+ if isnan(cur_associations[other.source.name]):
+ cur_associations[other.source.name] = 0.0
  mirror_association(self._associations, feature_name, other.source.name, \
  cur_associations[other.source.name])
  if process_compare:
  cur_associations_compare[other.source.name] = \
  feature.compare.corr(other.compare, method='pearson')
+ # TODO: display correlation error better in graph!
+ if isnan(cur_associations_compare[other.source.name]):
+ cur_associations_compare[other.source.name] = 0.0
  mirror_association(self._associations_compare, feature_name, other.source.name, \
  cur_associations_compare[other.source.name])
  self.progress_bar.update(1)
diff --git a/sweetviz/series_analyzer.py b/sweetviz/series_analyzer.py
@@ -12,25 +12,30 @@ def get_counts(series: pd.Series) -> dict:
  value_counts_without_nan = (
  value_counts_with_nan.reset_index().dropna().set_index("index").iloc[:, 0]
  )
- distinct_count_with_nan = value_counts_with_nan.count()
+ # IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
+ # distinct_count_with_nan = value_counts_with_nan.count()
  distinct_count_without_nan = value_counts_without_nan.count()
 
  # Convert indices to strings (helps with referencing later)
  # value_counts_without_nan.index = value_counts_without_nan.index.map(str)
 
  return {
  # "value_counts": value_counts_without_nan, # Alias
- "value_counts_with_nan": value_counts_with_nan,
+ # IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
+ # "value_counts_with_nan": value_counts_with_nan,
  "value_counts_without_nan": value_counts_without_nan,
- "distinct_count_with_nan": distinct_count_with_nan,
+ # IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
+ # "distinct_count_with_nan": distinct_count_with_nan,
  "distinct_count_without_nan": distinct_count_without_nan,
  "num_rows_with_data": series.count(),
  "num_rows_total": len(series),
  }
 
 
 def fill_out_missing_counts_in_other_series(my_counts:dict, other_counts:dict):
- to_fill_list = ["value_counts_with_nan", "value_counts_without_nan"]
+ # IGNORING NAN FOR NOW AS IT CAUSES ISSUES [FIX]
+ # to_fill_list = ["value_counts_with_nan", "value_counts_without_nan"]
+ to_fill_list = ["value_counts_without_nan"]
  for to_fill in to_fill_list:
  for key, value in other_counts[to_fill].items():
  if key not in my_counts[to_fill]: