Pandas_profiling still gives index error even I reduce the dataframe size #560

bi2017dg · 2020-08-28T20:54:28Z

IndexError Traceback (most recent call last)
in
19 #ProfileReport(df_s[:10000])
20 profile = df[:1000].profile_report(title='LATE FEE & SUSPENSION Profiling Report', html={'style':{'full_width':True}})
---> 21 profile.to_file("output.html")
22 #(title='LATE FEE & SUSPENSION Profiling Report', html={'style':{'full_width':True}})
23 #profile.to_file(output_file="data profile.html")

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_file(self, output_file, silent)
243 silent: if False, opens the file in the default browser or download it in a Google Colab environment
244 """
--> 245 if not isinstance(output_file, Path):
246 output_file = Path(str(output_file))
247

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_html(self)
346 with tqdm(total=1, desc="Render JSON", disable=disable_progress_bar) as pbar:
347 data = json.dumps(description, indent=4, cls=CustomEncoder)
--> 348 pbar.update()
349 return data
350

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in html(self)
166 if self._df_hash == -1 and self.df is not None:
167 self._df_hash = hash_dataframe(self.df)
--> 168 return self._df_hash
169
170 @Property

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in _render_html(self)
273 if not silent:
274 try:
--> 275 from google.colab import files
276
277 files.download(output_file.absolute().as_uri())

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in report(self)
160 self._title = config["title"].get(str)
161
--> 162 return self._title
163
164 @Property

~\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in description_set(self)
141 self._report = None
142 self._html = None
--> 143 self._widgets = None
144 self._json = None
145

~\Anaconda3\lib\site-packages\pandas_profiling\model\describe.py in describe(title, df)
61 number_of_tasks = 9 + len(df.columns) + len(correlation_names)
62
---> 63 with tqdm(
64 total=number_of_tasks, desc="Summarize dataset", disable=disable_progress_bar
65 ) as pbar:

~\Anaconda3\lib\site-packages\pandas_profiling\model\summary.py in get_series_descriptions(df, pbar)
471 def get_series_description(series):
472 return describe_1d(series)
--> 473
474
475 def get_series_descriptions(df, pbar):

~\Anaconda3\lib\multiprocessing\pool.py in next(self, timeout)
746 if success:
747 return value
--> 748 raise value
749
750 next = next # XXX

~\Anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:

~\Anaconda3\lib\site-packages\pandas_profiling\model\summary.py in multiprocess_1d(args)
448 Variable.TYPE_URL: describe_url_1d,
449 Variable.TYPE_PATH: describe_path_1d,
--> 450 Variable.TYPE_IMAGE: describe_image_1d,
451 Variable.TYPE_FILE: describe_file_1d,
452 }

~\Anaconda3\lib\site-packages\pandas_profiling\model\summary.py in describe_1d(series)
417 series: The Series to describe.
418 series_description: The dict containing the series description so far.
--> 419
420 Returns:
421 A dict containing calculated series description values.

~\Anaconda3\lib\site-packages\pandas_profiling\model\summary.py in describe_date_1d(series, series_description)
231
232 stats["monotonic_increase"] = series.is_monotonic_increasing
--> 233 stats["monotonic_decrease"] = series.is_monotonic_decreasing
234
235 stats["monotonic_increase_strict"] = (

<array_function internals> in histogram(*args, **kwargs)

~\Anaconda3\lib\site-packages\numpy\lib\histograms.py in histogram(a, bins, range, normed, weights, density)
857 # The index computation is not guaranteed to give exactly
858 # consistent results within ~1 ULP of the bin edges.
--> 859 decrement = tmp_a < bin_edges[indices]
860 indices[decrement] -= 1
861 # The last bin includes the right edge. The other bins do not.

IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2

sbrugman · 2020-09-02T15:17:02Z

Could you provide the minimal information to reproduce this error? This guide can help crafting a minimal bug report.

the minimal code you are using to generate the report
which environment you are using:
- operating system (e.g. Windows, Linux, Mac)
- Python version (e.g. 3.7)
- jupyter notebook, console or IDE such as PyCharm
- Package manager (e.g. pip, conda conda info)
- packages (pip freeze > packages.txt or conda list)
a sample or description of the dataset (df.head(), df.info())

mike11339 · 2020-09-23T16:04:46Z

I got the same issue as shown below. How did you solve it?

IndexError Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\IPython\core\formatters.py in call(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in repr_html(self)
395 def repr_html(self):
396 """The ipython notebook widgets user interface gets called by the jupyter notebook."""
--> 397 self.to_notebook_iframe()
398
399 def repr(self):

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in to_notebook_iframe(self)
375 with warnings.catch_warnings():
376 warnings.simplefilter("ignore")
--> 377 display(get_notebook_iframe(self))
378
379 def to_widgets(self):

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe(profile)
63 output = get_notebook_iframe_src(profile)
64 elif attribute == "srcdoc":
---> 65 output = get_notebook_iframe_srcdoc(profile)
66 else:
67 raise ValueError(

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe_srcdoc(profile)
21 width = config["notebook"]["iframe"]["width"].get(str)
22 height = config["notebook"]["iframe"]["height"].get(str)
---> 23 src = html.escape(profile.to_html())
24
25 iframe = f'<iframe width="{width}" height="{height}" srcdoc="{src}" frameborder="0" allowfullscreen></iframe>'

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in to_html(self)
346
347 """
--> 348 return self.html
349
350 def to_json(self) -> str:

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in html(self)
166 def html(self):
167 if self._html is None:
--> 168 self._html = self._render_html()
169 return self._html
170

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in _render_html(self)
273 from pandas_profiling.report.presentation.flavours import HTMLReport
274
--> 275 report = self.report
276
277 disable_progress_bar = not config["progress_bar"].get(bool)

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in report(self)
160 def report(self):
161 if self._report is None:
--> 162 self._report = get_report_structure(self.description_set)
163 return self._report
164

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in description_set(self)
141 def description_set(self):
142 if self._description_set is None:
--> 143 self._description_set = describe_df(self.title, self.df)
144 return self._description_set
145

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\describe.py in describe(title, df)
61 total=number_of_tasks, desc="Summarize dataset", disable=disable_progress_bar
62 ) as pbar:
---> 63 series_description = get_series_descriptions(df, pbar)
64
65 pbar.set_postfix_str("Get variable types")

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in get_series_descriptions(df, pbar)
470 # TODO: use Pool for Linux-based systems
471 with multiprocessing.pool.ThreadPool(pool_size) as executor:
--> 472 for i, (column, description) in enumerate(
473 executor.imap_unordered(multiprocess_1d, args)
474 ):

C:\ProgramData\Anaconda3\envs\data_analysis\lib\multiprocessing\pool.py in next(self, timeout)
866 if success:
867 return value
--> 868 raise value
869
870 next = next # XXX

C:\ProgramData\Anaconda3\envs\data_analysis\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in multiprocess_1d(args)
448 """
449 column, series = args
--> 450 return column, describe_1d(series)
451
452 # Multiprocessing of Describe 1D for each column

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in describe_1d(series)
417 if series_description["type"] in type_to_func:
418 series_description.update(
--> 419 type_to_func[series_description["type"]](series, series_description)
420 )
421 else:

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in describe_date_1d(series, series_description)
230 )
231 if chi_squared_threshold > 0.0:
--> 232 histogram = np.histogram(
233 series[series.notna()].astype("int64").values, bins="auto"
234 )[0]

<array_function internals> in histogram(*args, **kwargs)

C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\numpy\lib\histograms.py in histogram(a, bins, range, normed, weights, density)
854 # The index computation is not guaranteed to give exactly
855 # consistent results within ~1 ULP of the bin edges.
--> 856 decrement = tmp_a < bin_edges[indices]
857 indices[decrement] -= 1
858 # The last bin includes the right edge. The other bins do not.

IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2

bi2017dg · 2020-09-25T13:00:36Z

Hi , The error was due to a lot of data columns not having values, they were nulls . This resulted in not being able to create an index and matrix. Thanks for responding to my emails. I will keep posting if I receive any errors. The only thing that I am facing now is the memory error due to lack of RAM. Any suggestion if I have a billion rows from the database ( Not using Vaex as this can use big data format not direct relational data using pyodbc). The chunksize can help but if anyone can provide the chunksize proper usage would be great. Thanks, Debashis

…

On Wed, Sep 23, 2020 at 11:05 AM Mike Lee ***@***.***> wrote: I got the same issue as shown below. How did you solve it? ------------------------------ IndexError Traceback (most recent call last) C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\IPython\core\formatters.py in *call*(self, obj) 343 method = get_real_method(obj, self.print_method) 344 if method is not None: --> 345 return method() 346 return None 347 else: C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in *repr_html*(self) 395 def *repr_html*(self): 396 """The ipython notebook widgets user interface gets called by the jupyter notebook.""" --> 397 self.to_notebook_iframe() 398 399 def *repr*(self): C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in to_notebook_iframe(self) 375 with warnings.catch_warnings(): 376 warnings.simplefilter("ignore") --> 377 display(get_notebook_iframe(self)) 378 379 def to_widgets(self): C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe(profile) 63 output = get_notebook_iframe_src(profile) 64 elif attribute == "srcdoc": ---> 65 output = get_notebook_iframe_srcdoc(profile) 66 else: 67 raise ValueError( C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe_srcdoc(profile) 21 width = config["notebook"]["iframe"]["width"].get(str) 22 height = config["notebook"]["iframe"]["height"].get(str) ---> 23 src = html.escape(profile.to_html()) 24 25 iframe = f'<iframe width="{width}" height="{height}" srcdoc="{src}" frameborder="0" allowfullscreen></iframe>' C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in to_html(self) 346 347 """ --> 348 return self.html 349 350 def to_json(self) -> str: C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in html(self) 166 def html(self): 167 if self._html is None: --> 168 self._html = self._render_html() 169 return self._html 170 C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in _render_html(self) 273 from pandas_profiling.report.presentation.flavours import HTMLReport 274 --> 275 report = self.report 276 277 disable_progress_bar = not config["progress_bar"].get(bool) C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in report(self) 160 def report(self): 161 if self._report is None: --> 162 self._report = get_report_structure(self.description_set) 163 return self._report 164 C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\profile_report.py in description_set(self) 141 def description_set(self): 142 if self._description_set is None: --> 143 self._description_set = describe_df(self.title, self.df) 144 return self._description_set 145 C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\describe.py in describe(title, df) 61 total=number_of_tasks, desc="Summarize dataset", disable=disable_progress_bar 62 ) as pbar: ---> 63 series_description = get_series_descriptions(df, pbar) 64 65 pbar.set_postfix_str("Get variable types") C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in get_series_descriptions(df, pbar) 470 # TODO: use Pool for Linux-based systems 471 with multiprocessing.pool.ThreadPool(pool_size) as executor: --> 472 for i, (column, description) in enumerate( 473 executor.imap_unordered(multiprocess_1d, args) 474 ): C:\ProgramData\Anaconda3\envs\data_analysis\lib\multiprocessing\pool.py in next(self, timeout) 866 if success: 867 return value --> 868 raise value 869 870 *next* = next # XXX C:\ProgramData\Anaconda3\envs\data_analysis\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 123 job, i, func, args, kwds = task 124 try: --> 125 result = (True, func(*args, **kwds)) 126 except Exception as e: 127 if wrap_exception and func is not _helper_reraises_exception: C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in multiprocess_1d(args) 448 """ 449 column, series = args --> 450 return column, describe_1d(series) 451 452 # Multiprocessing of Describe 1D for each column C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in describe_1d(series) 417 if series_description["type"] in type_to_func: 418 series_description.update( --> 419 type_to_func[series_description["type"]](series, series_description) 420 ) 421 else: C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\pandas_profiling\model\summary.py in describe_date_1d(series, series_description) 230 ) 231 if chi_squared_threshold > 0.0: --> 232 histogram = np.histogram( 233 series[series.notna()].astype("int64").values, bins="auto" 234 )[0] <*array_function* internals> in histogram(*args, **kwargs) C:\ProgramData\Anaconda3\envs\data_analysis\lib\site-packages\numpy\lib\histograms.py in histogram(a, bins, range, normed, weights, density) 854 # The index computation is not guaranteed to give exactly 855 # consistent results within ~1 ULP of the bin edges. --> 856 decrement = tmp_a < bin_edges[indices] 857 indices[decrement] -= 1 858 # The last bin includes the right edge. The other bins do not. IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2 — You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub <#560 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AKEORMCDHGNZVVDNWNNT64TSHIMC7ANCNFSM4QOQITYQ> .

snehalvartak · 2020-11-11T05:21:52Z

@bi2017dg how was this resolved? I am facing the same issue.

zhoujianch · 2023-04-18T01:57:16Z

I got the same issue as shown below. How did you solve it?
pandas=1.5.0
ydata-profiling=4.1.1

In [2]: a
Out[2]:
{'COUNT_READ_ONLY': [5.0],
'EVENT_NAME': ['transaction'],
'AVG_TIMER_READ_ONLY': [438577000.0],
'SUM_TIMER_READ_ONLY': [2192887000.0],
'SUM_TIMER_READ_WRITE': [1.0950042604231e+16],
'MIN_TIMER_READ_ONLY': [104499000.0],
'MIN_TIMER_READ_WRITE': [8308000.0],
'SUM_TIMER_WAIT': [1.0950044797118e+16],
'AVG_TIMER_WAIT': [1860772000.0],
'COUNT_READ_WRITE': [5884671.0],
'MAX_TIMER_READ_ONLY': [836559000.0],
'MAX_TIMER_WAIT': [603689471745000.0],
'MAX_TIMER_READ_WRITE': [603689471745000.0],
'AVG_TIMER_READ_WRITE': [1860773000.0],
'COUNT_STAR': [5884676.0],
'MIN_TIMER_WAIT': [8308000.0]}

In [3]: import pandas as pd

In [4]: import numpy as np

In [5]: from ydata_profiling import ProfileReport

In [6]: table = pd.DataFrame.from_dict(a)

In [7]: profile_report = ProfileReport(
...: table,
...: progress_bar=False,
...: infer_dtypes=False,
...: missing_diagrams=None,
...: correlations=None,
...: interactions=None,
...: # duplicates=None,
...: samples=None)
In [8]: profile_report.get_description()
/root/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/core/_methods.py:234: RuntimeWarning: Degrees of freedom <= 0 for slice
keepdims=keepdims)
/root/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py:822: RuntimeWarning: divide by zero encountered in double_scalars
norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge)
/root/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py:850: RuntimeWarning: invalid value encountered in multiply
f_indices = _unsigned_subtract(tmp_a, first_edge) * norm
/root/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3622: RuntimeWarning: Degrees of freedom <= 0 for slice
**kwargs)
/root/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/core/_methods.py:226: RuntimeWarning: invalid value encountered in double_scalars
ret = ret.dtype.type(ret / rcount)

IndexError Traceback (most recent call last)
in
----> 1 profile_report.get_description()

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in get_description(self)
315 Dict containing a description for each variable in the DataFrame.
316 """
--> 317 return self.description_set
318
319 def get_rejected_variables(self) -> set:

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 self.summarizer,
252 self.typeset,
--> 253 self._sample,
254 )
255 return self._description_set

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
70 pbar.total += len(df.columns)
71 series_description = get_series_descriptions(
---> 72 config, df, summarizer, typeset, pbar
73 )
74

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_get_series_descriptions(config, df, summarizer, typeset, pbar)
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
99 for i, (column, description) in enumerate(
--> 100 executor.imap_unordered(multiprocess_1d, args)
101 ):
102 pbar.set_postfix_str(f"Describe variable:{column}")

~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in next(self, timeout)
746 if success:
747 return value
--> 748 raise value
749
750 next = next # XXX

~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in multiprocess_1d(args)
77 """
78 column, series = args
---> 79 return column, describe_1d(config, series, summarizer, typeset)
80
81 pool_size = config.pool_size

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_describe_1d(config, series, summarizer, typeset)
55
56 typeset.type_schema[series.name] = vtype
---> 57 return summarizer.summarize(config, series, dtype=vtype)
58
59

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
37 object:
38 """
---> 39 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
40 return summary
41

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
63 if not summary["hashable"]:
64 return config, series, summary
---> 65 return fn(config, series, summary)
66
67 return inner

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
80 series = series.dropna()
81
---> 82 return fn(config, series, summary)
83
84 return inner

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/describe_numeric_pandas.py in pandas_describe_numeric_1d(config, series, summary)
118
119 if chi_squared_threshold > 0.0:
--> 120 stats["chi_squared"] = chi_square(finite_values)
121
122 stats["range"] = stats["max"] - stats["min"]

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in chi_square(values, histogram)
50 ) -> dict:
51 if histogram is None:
---> 52 histogram, _ = np.histogram(values, bins="auto")
53 return dict(chisquare(histogram)._asdict())
54

<array_function internals> in histogram(*args, **kwargs)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py in histogram(a, bins, range, normed, weights, density)
854 # The index computation is not guaranteed to give exactly
855 # consistent results within ~1 ULP of the bin edges.
--> 856 decrement = tmp_a < bin_edges[indices]
857 indices[decrement] -= 1
858 # The last bin includes the right edge. The other bins do not.

IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2

It can not handle float32 data type?

sbrugman added the information requested ❔ Cannot reproduce, waiting for minimum reproduction details. label Sep 2, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Pandas_profiling still gives index error even I reduce the dataframe size #560

Pandas_profiling still gives index error even I reduce the dataframe size #560

bi2017dg commented Aug 28, 2020

sbrugman commented Sep 2, 2020

mike11339 commented Sep 23, 2020

bi2017dg commented Sep 25, 2020 via email

snehalvartak commented Nov 11, 2020

zhoujianch commented Apr 18, 2023 •

edited

Loading

Pandas_profiling still gives index error even I reduce the dataframe size #560

Pandas_profiling still gives index error even I reduce the dataframe size #560

Comments

bi2017dg commented Aug 28, 2020

sbrugman commented Sep 2, 2020

mike11339 commented Sep 23, 2020

bi2017dg commented Sep 25, 2020 via email

snehalvartak commented Nov 11, 2020

zhoujianch commented Apr 18, 2023 • edited Loading

zhoujianch commented Apr 18, 2023 •

edited

Loading