diff --git a/sweetviz/graph_cat.py b/sweetviz/graph_cat.py index 9d3f27b..4f40ea8 100644 --- a/sweetviz/graph_cat.py +++ b/sweetviz/graph_cat.py @@ -83,7 +83,7 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess): axs.xaxis.tick_top() elif is_detail: height = config["Graphs"].getfloat("detail_graph_height_base") \ - + config["Graphs"].getfloat("detail_graph_height_per_elem") * len(plot_data_series) + + config["Graphs"].getfloat("detail_graph_height_per_elem") * max(1, len(plot_data_series)) if height > config["Graphs"].getfloat("detail_graph_categorical_max_height"): # Shrink height to fit, past a certain number height = config["Graphs"].getfloat("detail_graph_categorical_max_height") diff --git a/sweetviz/graph_numeric.py b/sweetviz/graph_numeric.py index 37aa955..f363cf4 100644 --- a/sweetviz/graph_numeric.py +++ b/sweetviz/graph_numeric.py @@ -47,15 +47,23 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess): np.seterr(all='raise') # WORKAROUND histogram warnings cleaned_source = to_process.source[~np.isnan(to_process.source)] + if len(cleaned_source): + norm_source = np.full(len(cleaned_source), 1.0 / len(cleaned_source)) + else: + norm_source = [] if to_process.compare is not None: # COMPARE cleaned_compare = to_process.compare[~np.isnan(to_process.compare)] plot_data = (cleaned_source, cleaned_compare) - normalizing_weights = (np.full(len(cleaned_source), 1.0 / len(cleaned_source)), - np.full(len(cleaned_compare), 1.0 / len(cleaned_compare))) + if len(cleaned_compare): + norm_compare = np.full(len(cleaned_compare), 1.0 / len(cleaned_compare)) + else: + norm_compare = [] + normalizing_weights = (norm_source, norm_compare) + else: plot_data = cleaned_source - normalizing_weights = np.full(len(cleaned_source), 1.0 / len(cleaned_source)) + normalizing_weights = norm_source gap_percent = config["Graphs"].getfloat("summary_graph_categorical_gap") diff --git a/sweetviz/series_analyzer.py b/sweetviz/series_analyzer.py index c674dd0..894030d 100644 --- a/sweetviz/series_analyzer.py +++ b/sweetviz/series_analyzer.py @@ -83,6 +83,7 @@ def analyze_feature_to_dictionary(to_process: FeatureToProcess) -> dict: to_process.source_counts = get_counts(to_process.source) returned_feature_dict["type"] = determine_feature_type(to_process.source, to_process.source_counts, to_process.predetermined_type, "SOURCE") + source_type = returned_feature_dict["type"] # Determine COMPARED feature type & initialize compare_dict = None @@ -91,14 +92,31 @@ def analyze_feature_to_dictionary(to_process: FeatureToProcess) -> dict: compare_type = determine_feature_type(to_process.compare, to_process.compare_counts, returned_feature_dict["type"], "COMPARED") - # Explicitly show missing categories on each set - if compare_type == FeatureType.TYPE_CAT or compare_type == FeatureType.TYPE_BOOL: - fill_out_missing_counts_in_other_series(to_process.compare_counts, to_process.source_counts) - fill_out_missing_counts_in_other_series(to_process.source_counts, to_process.compare_counts) + if compare_type != FeatureType.TYPE_ALL_NAN and \ + source_type != FeatureType.TYPE_ALL_NAN: + # Explicitly show missing categories on each set + if compare_type == FeatureType.TYPE_CAT or compare_type == FeatureType.TYPE_BOOL: + fill_out_missing_counts_in_other_series(to_process.compare_counts, to_process.source_counts) + fill_out_missing_counts_in_other_series(to_process.source_counts, to_process.compare_counts) returned_feature_dict["compare"] = dict() compare_dict = returned_feature_dict["compare"] compare_dict["type"] = compare_type + # Settle all-NaN series, depending on source versus compared + if to_process.compare is not None: + # Settle all-Nan WITH COMPARE: Must consider all cases between source and compare + if compare_type == FeatureType.TYPE_ALL_NAN and source_type == FeatureType.TYPE_ALL_NAN: + returned_feature_dict["type"] = FeatureType.TYPE_TEXT + compare_dict["type"] = FeatureType.TYPE_TEXT + elif compare_type == FeatureType.TYPE_ALL_NAN: + compare_dict["type"] = source_type + elif source_type == FeatureType.TYPE_ALL_NAN: + returned_feature_dict["type"] = compare_type + else: + # Settle all-Nan WITHOUT COMPARE ( trivial: consider as TEXT ) + if source_type == FeatureType.TYPE_ALL_NAN: + returned_feature_dict["type"] = FeatureType.TYPE_TEXT + # Establish base stats add_series_base_stats_to_dict(to_process.source, to_process.source_counts, returned_feature_dict) if to_process.compare is not None: diff --git a/sweetviz/sv_types.py b/sweetviz/sv_types.py index 94c7b23..8fd4a87 100644 --- a/sweetviz/sv_types.py +++ b/sweetviz/sv_types.py @@ -12,6 +12,7 @@ class FeatureType(Enum): TYPE_NUM = "NUM" TYPE_TEXT = "TEXT" TYPE_UNSUPPORTED = "UNSUPPORTED" + TYPE_ALL_NAN = "ALL_NAN" TYPE_UNKNOWN = "UNKNOWN" TYPE_SKIPPED = "SKIPPED" diff --git a/sweetviz/type_detection.py b/sweetviz/type_detection.py index aff46eb..34b7754 100644 --- a/sweetviz/type_detection.py +++ b/sweetviz/type_detection.py @@ -29,7 +29,8 @@ def determine_feature_type(series: pd.Series, counts: dict, # TODO: must_be_this_type ENFORCING if counts["distinct_count_without_nan"] == 0: # Empty - var_type = FeatureType.TYPE_UNSUPPORTED + var_type = FeatureType.TYPE_ALL_NAN + # var_type = FeatureType.TYPE_UNSUPPORTED elif is_boolean(series, counts): var_type = FeatureType.TYPE_BOOL elif is_numeric(series, counts): @@ -48,7 +49,9 @@ def determine_feature_type(series: pd.Series, counts: dict, # NUM -> CAT # NUM -> TEXT if must_be_this_type != FeatureType.TYPE_UNKNOWN and \ - must_be_this_type != var_type: + must_be_this_type != var_type and \ + must_be_this_type != FeatureType.TYPE_ALL_NAN and \ + var_type != FeatureType.TYPE_ALL_NAN: if var_type == FeatureType.TYPE_TEXT and must_be_this_type == FeatureType.TYPE_CAT: var_type = FeatureType.TYPE_CAT elif (var_type == FeatureType.TYPE_CAT or var_type == FeatureType.TYPE_BOOL ) and \