Skip to content

Commit

Permalink
Improve manipulation of elites by modifying as_pandas (icaros-usc#158)
Browse files Browse the repository at this point in the history
- Remove old data attributes
- Remove references to old data attributes
- Implement ArchiveDataFrame and new as_pandas
- Test ArchiveDataFrame (as_pandas tests do not need to change)
- Fix tutorials
- Update docstring for as_pandas
  • Loading branch information
btjanaka authored Jul 16, 2021
1 parent 51cc071 commit 121bcc9
Show file tree
Hide file tree
Showing 10 changed files with 344 additions and 204 deletions.
49 changes: 33 additions & 16 deletions docs/_templates/autosummary/class.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,38 @@
.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}
{%- if name == "ArchiveDataFrame" %}
:no-inherited-members:
:members:
{% endif %}

{% block methods %}

{% if methods %}
.. rubric:: {{ _('Methods') }}

.. autosummary::
{% for item in all_methods %}
{%- if not item.startswith('_') or item in ['__len__',
'__call__',
'__next__',
'__iter__',
'__getitem__',
'__setitem__',
'__delitem__',
] %}
~{{ name }}.{{ item }}
{%- endif -%}
{%- endfor %}
{% if name == "ArchiveDataFrame" %}
~{{ name }}.batch_behaviors
~{{ name }}.batch_indices
~{{ name }}.batch_metadata
~{{ name }}.batch_objectives
~{{ name }}.batch_solutions
~{{ name }}.iterelites
{% else %}
{% for item in all_methods %}
{%- if not item.startswith('_') or item in ['__len__',
'__call__',
'__next__',
'__iter__',
'__getitem__',
'__setitem__',
'__delitem__',
] %}
~{{ name }}.{{ item }}
{%- endif -%}
{%- endfor %}
{% endif %}

{% endif %}
{% endblock %}

Expand All @@ -37,8 +50,12 @@
.. rubric:: {{ _('Attributes') }}

.. autosummary::
{% for item in attributes %}
~{{ name }}.{{ item }}
{%- endfor %}
{% if name == "ArchiveDataFrame" %}
{% else %}
{% for item in attributes %}
~{{ name }}.{{ item }}
{%- endfor %}
{% endif %}

{% endif %}
{% endblock %}
4 changes: 3 additions & 1 deletion examples/tutorials/lsi_mnist.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,11 @@
"\n",
" imgs = []\n",
" img_size = (28, 28)\n",
" df = archive.as_pandas()\n",
" solutions, indices = df.batch_solutions(), df.batch_indices()\n",
" for index in grid_indices:\n",
" try:\n",
" sol = archive.solutions[archive.indices.index(index)]\n",
" sol = solutions[indices.index(index)]\n",
" except ValueError:\n",
" print(f\"There is no solution at index {index}.\")\n",
" return\n",
Expand Down
9 changes: 5 additions & 4 deletions examples/tutorials/lunar_lander.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,8 @@
"metadata": {},
"outputs": [],
"source": [
"high_perf_sols = archive.solutions[archive.objective_values > 200]"
"df = archive.as_pandas()\n",
"high_perf_sols = df.query(\"objective > 200\").sort_values(\"objective\", ascending=False)"
]
},
{
Expand Down Expand Up @@ -733,8 +734,8 @@
],
"source": [
"if len(high_perf_sols) > 0:\n",
" for sol in high_perf_sols[[0, len(high_perf_sols) // 2, -1]]:\n",
" display_video(sol)"
" for elite in high_perf_sols.loc[[0, len(high_perf_sols) // 2, -1]].iterelites():\n",
" display_video(elite.sol)"
]
},
{
Expand Down Expand Up @@ -801,7 +802,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.7.3"
}
},
"nbformat": 4,
Expand Down
3 changes: 3 additions & 0 deletions ribs/archives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
ribs.archives.ArchiveBase
ribs.archives.AddStatus
ribs.archives.Elite
ribs.archives.ArchiveDataFrame
ribs.archives.ArchiveStats
"""
from ribs.archives._add_status import AddStatus
from ribs.archives._archive_base import ArchiveBase
from ribs.archives._archive_data_frame import ArchiveDataFrame
from ribs.archives._archive_stats import ArchiveStats
from ribs.archives._cvt_archive import CVTArchive
from ribs.archives._elite import Elite
Expand All @@ -30,5 +32,6 @@
"ArchiveBase",
"AddStatus",
"Elite",
"ArchiveDataFrame",
"ArchiveStats",
]
154 changes: 28 additions & 126 deletions ribs/archives/_archive_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import numba as nb
import numpy as np
import pandas as pd
from decorator import decorator

from ribs.archives._add_status import AddStatus
from ribs.archives._archive_data_frame import ArchiveDataFrame
from ribs.archives._archive_stats import ArchiveStats
from ribs.archives._elite import Elite

Expand Down Expand Up @@ -75,31 +75,6 @@ def get(self, max_val):
return val


class CachedView:
"""Maintains a readonly view of the given numpy array.
Whenever the state changes in update(), the view is updated.
This class is useful when returning the archive data, e.g.
archive.solutions. If the archive has many indices, indexing into the array
can be expensive (e.g. ~0.5 seconds for 250k indices), and it adds up if the
user does this many times, so we only want to do the indexing once.
"""

def __init__(self, array):
self.array = array
self.view = None
self.state = None

def update(self, indices, state):
"""Sets view to array[indices], but only if state has changed."""
if state != self.state:
self.state = state.copy()
self.view = self.array[indices]
self.view.flags.writeable = False
return self.view


class ArchiveIterator:
"""An iterator for an archive's elites."""

Expand Down Expand Up @@ -163,13 +138,6 @@ class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes
| ``_metadata`` | ``(*storage_dims)`` |
+------------------------+------------------------------------+
.. note::
These arrays are different from the elite data attributes
:attr:`solutions`, :attr:`objective_values`, :attr:`behavior_values`,
and :attr:`metadata`. The attributes provide access to data about elites
in the archive via a view into these arrays.
All of these arrays are accessed via a common index. If we have index ``i``,
we access its solution at ``_solutions[i]``, its behavior values at
``_behavior_values[i]``, etc.
Expand Down Expand Up @@ -254,12 +222,6 @@ def __init__(self, storage_dims, behavior_dim, seed=None, dtype=np.float64):
self._bins = np.product(self._storage_dims)
self._stats = None

# Array views for providing access to data.
self._solutions_view = None
self._objective_values_view = None
self._behavior_values_view = None
self._metadata_view = None

# Tracks archive modifications by counting calls to clear() and add().
self._state = None

Expand Down Expand Up @@ -327,73 +289,6 @@ def dtype(self):
values."""
return self._dtype

## Data attributes ##

@property
@require_init
def solutions(self):
"""((len(archive), solution_dim) numpy.ndarray): Solutions of all elites
currently in the archive."""
return self._solutions_view.update(self._occupied_indices_cols,
self._state)

@property
@require_init
def objective_values(self):
"""(len(archive),) numpy.ndarray): Objective values of all elites
currently in the archive.
These correspond to :attr:`solutions`, e.g. ``objective_values[0]``
corresponds to ``solutions[0]``.
"""
return self._objective_values_view.update(self._occupied_indices_cols,
self._state)

@property
@require_init
def behavior_values(self):
"""(len(archive), behavior_dim) numpy.ndarray): Behavior values of all
elites currently in the archive.
These correspond to :attr:`solutions`, e.g. ``behavior_values[0]``
corresponds to ``solutions[0]``.
"""
return self._behavior_values_view.update(self._occupied_indices_cols,
self._state)

@property
@require_init
def indices(self):
"""(len(archive),) tuple: Tuple with indices of all elites in the
archive.
Each entry in the tuple is an index, which can be either an int or tuple
of int (see :meth:`get_index` for the specific archive for more info).
These correspond to :attr:`solutions`, e.g. ``indices[0]`` corresponds
to ``solutions[0]``.
This is a tuple instead of a numpy array because numpy arrays are unable
to (easily) store tuples directly.
"""
return tuple(self._occupied_indices) # List to tuple is cheap.

@property
@require_init
def metadata(self):
"""(len(archive),) numpy.ndarray): Metadata of all elites currently in
the archive.
This array is an object array.
These correspond to :attr:`solutions`, e.g. ``metadata[0]`` corresponds
to ``solutions[0]``.
"""
return self._metadata_view.update(self._occupied_indices_cols,
self._state)

## Methods ##

def __len__(self):
"""Number of elites in the archive."""
require_init_inline(self)
Expand Down Expand Up @@ -463,13 +358,8 @@ def initialize(self, solution_dim):
self._occupied_indices_cols = tuple(
[] for _ in range(len(self._storage_dims)))

self._solutions_view = CachedView(self._solutions)
self._objective_values_view = CachedView(self._objective_values)
self._behavior_values_view = CachedView(self._behavior_values)
self._metadata_view = CachedView(self._metadata)
self._state = {"clear": 0, "add": 0}

self._stats_reset()
self._state = {"clear": 0, "add": 0}

@require_init
def clear(self):
Expand Down Expand Up @@ -691,19 +581,21 @@ def get_random_elite(self):
)

def as_pandas(self, include_solutions=True, include_metadata=False):
"""Converts the archive into a Pandas dataframe.
"""Converts the archive into an :class:`ArchiveDataFrame` (a child class
of :class:`pandas.DataFrame`).
This base class implementation creates a dataframe consisting of:
The implementation of this method in :class:`ArchiveBase` creates a
dataframe consisting of:
- ``len(self._storage_dims)`` columns for the index, named
``index_0, index_1, ...`` In :class:`~ribs.archives.GridArchive` and
:class:`~ribs.archives.SlidingBoundariesArchive`, there are
:attr:`behavior_dim` columns. In :class:`~ribs.archives.CVTArchive`,
there is just one column. See :meth:`get_index` for more info.
- ``self._behavior_dim`` columns for the behavior characteristics, named
- :attr:`behavior_dim` columns for the behavior characteristics, named
``behavior_0, behavior_1, ...``
- 1 column for the objective values, named ``objective``
- ``solution_dim`` columns for the solution vectors, named
- :attr:`solution_dim` columns for the solution vectors, named
``solution_0, solution_1, ...``
- 1 column for the metadata objects, named ``metadata``
Expand All @@ -715,31 +607,41 @@ def as_pandas(self, include_solutions=True, include_metadata=False):
| | ... | | ... | | | ... | |
+---------+------+-------------+------+------------+-------------+-----+----------+
Compared to :class:`pandas.DataFrame`, the :class:`ArchiveDataFrame`
adds methods and attributes which make it easier to manipulate archive
data. For more information, refer to the :class:`ArchiveDataFrame`
documentation.
Args:
include_solutions (bool): Whether to include solution columns.
include_metadata (bool): Whether to include the metadata column.
Note that methods like :meth:`~pandas.DataFrame.to_csv` may not
properly save the dataframe since the metadata objects may not
be representable in a CSV.
Returns:
pandas.DataFrame: See above.
ArchiveDataFrame: See above.
""" # pylint: disable = line-too-long
data = OrderedDict()
indices = self._occupied_indices_cols

index_dim = len(self._storage_dims)
for i in range(index_dim):
data[f"index_{i}"] = np.asarray(self._occupied_indices_cols[i],
dtype=int)
for i, col in enumerate(indices):
data[f"index_{i}"] = np.asarray(col, dtype=int)

behavior_values = self._behavior_values[indices]
for i in range(self._behavior_dim):
data[f"behavior_{i}"] = self.behavior_values[:, i]
data[f"behavior_{i}"] = behavior_values[:, i]

data["objective"] = self.objective_values
data["objective"] = self._objective_values[indices]

if include_solutions:
solutions = self._solutions[indices]
for i in range(self._solution_dim):
data[f"solution_{i}"] = self.solutions[:, i]
data[f"solution_{i}"] = solutions[:, i]

if include_metadata:
data["metadata"] = self.metadata
return pd.DataFrame(data)
data["metadata"] = self._metadata[indices]

return ArchiveDataFrame(
data,
copy=False, # Fancy indexing above already results in copying.
)
Loading

0 comments on commit 121bcc9

Please sign in to comment.