From 96dd7d16025db36f9a65149c49c37504cc8f3c7f Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Thu, 7 Jul 2022 15:33:15 +0200 Subject: [PATCH] feat: entropy profile --- popmon/analysis/profiling/hist_profiler.py | 4 +--- popmon/analysis/profiling/profiles.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index 9db85b43..53d51714 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -102,6 +102,7 @@ def _profile_1d_histogram(self, name, hist): # difference between htype=None and htype="all" are arguments (bin labels vs hist) profile.update(Profiles.run([hist], dim=1, htype=None)) + profile.update(Profiles.run([hist], dim=-1, htype=None)) # postprocessing TS if is_ts: @@ -110,9 +111,6 @@ def _profile_1d_histogram(self, name, hist): for k, v in profile.items() } - # postprocessing sum - profile["count"] = profile["filled"] + profile["nan"] - return profile def _profile_nd_histogram(self, name, hist, dim): diff --git a/popmon/analysis/profiling/profiles.py b/popmon/analysis/profiling/profiles.py index bc4d48da..fa660f52 100644 --- a/popmon/analysis/profiling/profiles.py +++ b/popmon/analysis/profiling/profiles.py @@ -130,6 +130,18 @@ def profile_count(hist): return int(sum_entries(hist)) +@Profiles.register( + key="entropy", + description="Entropy in nats", + dim=-1, + htype=None, +) +def profile_entropy(hist): + h = hist.bin_entries() + h = h / h.sum() + return -(h * np.ma.log(h)).sum() + + @Profiles.register( key="filled", description="Number of non-missing entries (non-NaN)",