Skip to content

Commit

Permalink
CHANGES:
Browse files Browse the repository at this point in the history
core:
* When creating a masked array with named fields, the mask has now a flexible type [(n,bool) for n in fields], which allows individual fields to be masked.
* When a masked array has named fields, setting the mask to a sequence of booleans will set the mask of all the fields of the corresponding record.
* A new property, recordmask, returns either the mask (when no named fields) or a boolean array where values are True if all the fields of one record are masked, False otherwise.
* A new private attribute, _isfield, has been introduced to keep track whether an array is a field of a record-like masked array or not, and make sure that the mask is properly propagated.
* Setting an existing mask to nomask will only fill the mask with False, not transform it to nomask

mrecords:
* _fieldmask is now only a synonym for _mask, kept for convenience
* revamped __getattribute__ to the example of numpy.core.records.recarray.__getattribute__
* __setslice__ and filled are now inhertied from MaskedArray

tests
* The tests in test_core have been reorganized to improve clarity and avoid duplication.
* test_extras now uses the convention "import numpy as np"
  • Loading branch information
pierregm committed Jun 8, 2008
1 parent 3cd9e03 commit c0bdc3a
Show file tree
Hide file tree
Showing 5 changed files with 1,737 additions and 1,410 deletions.
222 changes: 159 additions & 63 deletions numpy/ma/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,7 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
DeprecationWarning)
shrink = flag
# Process data............
_data = narray(data, dtype=dtype, copy=copy, subok=True, ndmin=ndmin)
_data = np.array(data, dtype=dtype, copy=copy, subok=True, ndmin=ndmin)
_baseclass = getattr(data, '_baseclass', type(_data))
_basedict = getattr(data, '_basedict', getattr(data, '__dict__', {}))
if not isinstance(data, MaskedArray) or not subok:
Expand All @@ -1207,7 +1207,15 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
if hasattr(data,'_mask') and not isinstance(data, ndarray):
_data._mask = data._mask
_sharedmask = True
# Process mask ...........
# Process mask ...............................
# Number of named fields (or zero if none)
names_ = _data.dtype.names or ()
# Type of the mask
if names_:
mdtype = [(n, MaskType) for n in names_]
else:
mdtype = MaskType
# Case 1. : no mask in input ............
if mask is nomask:
# Erase the current mask ?
if not keep_mask:
Expand All @@ -1216,15 +1224,22 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
_data._mask = nomask
# With full version
else:
_data._mask = np.zeros(_data.shape, dtype=MaskType)
_data._mask = np.zeros(_data.shape, dtype=mdtype)
if copy:
_data._mask = _data._mask.copy()
_data._sharedmask = False
else:
_data._sharedmask = True
# Case 2. : With a mask in input ........
else:
mask = np.array(mask, dtype=MaskType, copy=copy)
# Read the mask with the current mdtype
try:
mask = np.array(mask, copy=copy, dtype=mdtype)
# Or assume it's a sequence of bool/int
except TypeError:
mask = np.array([tuple([m]*len(mdtype)) for m in mask],
dtype=mdtype)
# Make sure the mask and the data have the same shape
if mask.shape != _data.shape:
(nd, nm) = (_data.size, mask.size)
if nm == 1:
Expand All @@ -1245,7 +1260,11 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
_data._mask = mask
_data._sharedmask = not copy
else:
_data._mask = umath.logical_or(mask, _data._mask)
if names_:
for n in names_:
_data._mask[n] |= mask[n]
else:
_data._mask = np.logical_or(mask, _data._mask)
_data._sharedmask = False
# Update fill_value.......
if fill_value is None:
Expand All @@ -1270,6 +1289,7 @@ def _update_from(self, obj):
_dict = dict(_fill_value=getattr(obj, '_fill_value', None),
_hardmask=getattr(obj, '_hardmask', False),
_sharedmask=getattr(obj, '_sharedmask', False),
_isfield=getattr(obj, '_isfield', False),
_baseclass=getattr(obj,'_baseclass', _baseclass),
_basedict=_basedict,)
self.__dict__.update(_dict)
Expand Down Expand Up @@ -1379,12 +1399,11 @@ def __getitem__(self, indx):
if isinstance(indx, basestring):
if self._fill_value is not None:
dout._fill_value = self._fill_value[indx]
dout._isfield = True
# Update the mask if needed
if _mask is not nomask:
if isinstance(indx, basestring):
dout._mask = _mask.reshape(dout.shape)
else:
dout._mask = ndarray.__getitem__(_mask, indx).reshape(dout.shape)
dout._mask = _mask[indx]
dout._sharedmask = True
# Note: Don't try to check for m.any(), that'll take too long...
return dout
#........................
Expand All @@ -1402,47 +1421,64 @@ def __setitem__(self, indx, value):
# msg = "Masked arrays must be filled before they can be used as indices!"
# raise IndexError, msg
if isinstance(indx, basestring):
ndarray.__setitem__(self._data, indx, getdata(value))
warnings.warn("MaskedArray.__setitem__ on fields: "\
"The mask is NOT affected!")
ndarray.__setitem__(self._data, indx, value)
ndarray.__setitem__(self._mask, indx, getmask(value))
return
#....
#........................................
# ndgetattr = ndarray.__getattribute__
_names = ndarray.__getattribute__(self,'dtype').names or ()
_data = self._data
_mask = ndarray.__getattribute__(self,'_mask')
#........................................
if value is masked:
m = self._mask
if m is nomask:
m = np.zeros(self.shape, dtype=MaskType)
m[indx] = True
self._mask = m
self._sharedmask = False
# The mask wasn't set: create a full version...
if _mask is nomask:
_mask = self._mask = make_mask_none(self.shape, _names)
# Now, set the mask to its value.
if _names:
_mask[indx] = tuple([True,] * len(_names))
else:
_mask[indx] = True
if not self._isfield:
self._sharedmask = False
return
#....
# dval = np.array(value, copy=False, dtype=self.dtype)
#........................................
# Get the _data part of the new value
dval = value
mval = getmask(value)
if self._mask is nomask:
# Get the _mask part of the new value
mval = getattr(value, '_mask', nomask)
if _names and mval is nomask:
mval = tuple([False] * len(_names))
if _mask is nomask:
# Set the data, then the mask
ndarray.__setitem__(self._data,indx,dval)
ndarray.__setitem__(_data, indx, dval)
if mval is not nomask:
self._mask = np.zeros(self.shape, dtype=MaskType)
self._mask[indx] = mval
_mask = self._mask = make_mask_none(self.shape, _names)
ndarray.__setitem__(_mask, indx, mval)
elif not self._hardmask:
# Unshare the mask if necessary to avoid propagation
self.unshare_mask()
if not self._isfield:
self.unshare_mask()
_mask = ndarray.__getattribute__(self,'_mask')
# Set the data, then the mask
ndarray.__setitem__(self._data, indx, dval)
ndarray.__setitem__(self._mask, indx, mval)
elif hasattr(indx, 'dtype') and (indx.dtype==bool_):
indx = indx * umath.logical_not(self._mask)
ndarray.__setitem__(self._data, indx, dval)
ndarray.__setitem__(_data, indx, dval)
ndarray.__setitem__(_mask, indx, mval)
elif hasattr(indx, 'dtype') and (indx.dtype==MaskType):
indx = indx * umath.logical_not(_mask)
ndarray.__setitem__(_data,indx,dval)
else:
mindx = mask_or(self._mask[indx], mval, copy=True)
if _names:
err_msg = "Flexible 'hard' masks are not yet supported..."
raise NotImplementedError(err_msg)
mindx = mask_or(_mask[indx], mval, copy=True)
dindx = self._data[indx]
if dindx.size > 1:
dindx[~mindx] = dval
elif mindx is nomask:
dindx = dval
ndarray.__setitem__(self._data, indx, dindx)
self._mask[indx] = mindx
ndarray.__setitem__(_data, indx, dindx)
_mask[indx] = mindx
return
#............................................
def __getslice__(self, i, j):
"""x.__getslice__(i, j) <==> x[i:j]
Expand All @@ -1466,28 +1502,57 @@ def __setmask__(self, mask, copy=False):
"""Set the mask.
"""
if mask is not nomask:
mask = narray(mask, copy=copy, dtype=MaskType)
# We could try to check whether shrinking is needed..
# ... but we would waste some precious time
# if self._shrinkmask and not mask.any():
# mask = nomask
if self._mask is nomask:
self._mask = mask
elif self._hardmask:
if mask is not nomask:
self._mask.__ior__(mask)
else:
# This one is tricky: if we set the mask that way, we may break the
# propagation. But if we don't, we end up with a mask full of False
# and a test on nomask fails...
names = ndarray.__getattribute__(self,'dtype').names
current_mask = ndarray.__getattribute__(self,'_mask')
if mask is masked:
mask = True
# Make sure the mask is set
if (current_mask is nomask):
# Just don't do anything is there's nothing to do...
if mask is nomask:
self._mask = nomask
return
current_mask = self._mask = make_mask_none(self.shape, names)
# No named fields.........
if names is None:
# Hardmask: don't unmask the data
if self._hardmask:
current_mask |= mask
# Softmask: set everything to False
else:
self.unshare_mask()
self._mask.flat = mask
if self._mask.shape:
self._mask = np.reshape(self._mask, self.shape)
current_mask.flat = mask
# Named fields w/ ............
else:
mdtype = current_mask.dtype
mask = np.array(mask, copy=False)
# Mask is a singleton
if not mask.ndim:
# It's a boolean : make a record
if mask.dtype.kind == 'b':
mask = np.array(tuple([mask.item()]*len(mdtype)),
dtype=mdtype)
# It's a record: make sure the dtype is correct
else:
mask = mask.astype(mdtype)
# Mask is a sequence
else:
# Make sure the new mask is a ndarray with the proper dtype
try:
mask = np.array(mask, copy=copy, dtype=mdtype)
# Or assume it's a sequence of bool/int
except TypeError:
mask = np.array([tuple([m]*len(mdtype)) for m in mask],
dtype=mdtype)
# Hardmask: don't unmask the data
if self._hardmask:
for n in names:
current_mask[n] |= mask[n]
# Softmask: set everything to False
else:
current_mask.flat = mask
# Reshape if needed
if current_mask.shape:
current_mask.shape = self.shape
return
_set_mask = __setmask__
#....
def _get_mask(self):
Expand All @@ -1498,6 +1563,26 @@ def _get_mask(self):
# return self._mask.reshape(self.shape)
return self._mask
mask = property(fget=_get_mask, fset=__setmask__, doc="Mask")
#
def _getrecordmask(self):
"""Return the mask of the records.
A record is masked when all the fields are masked.
"""
if self.dtype.names is None:
return self._mask
elif self.size > 1:
return self._mask.view((bool_, len(self.dtype))).all(1)
else:
return self._mask.view((bool_, len(self.dtype))).all()

def _setrecordmask(self):
"""Return the mask of the records.
A record is masked when all the fields are masked.
"""
raise NotImplementedError("Coming soon: setting the mask per records!")
recordmask = property(fget=_getrecordmask)
#............................................
def harden_mask(self):
"""Force the mask to hard.
Expand Down Expand Up @@ -1602,14 +1687,22 @@ def filled(self, fill_value=None):
"""
m = self._mask
if m is nomask or not m.any():
if m is nomask:
return self._data
#
if fill_value is None:
fill_value = self.fill_value
#
if self is masked_singleton:
result = np.asanyarray(fill_value)
return np.asanyarray(fill_value)
#
if len(self.dtype):
result = self._data.copy()
for n in result.dtype.names:
field = result[n]
np.putmask(field, self._mask[n], self.fill_value[n])
elif not m.any():
return self._data
else:
result = self._data.copy()
try:
Expand Down Expand Up @@ -1682,11 +1775,14 @@ def __str__(self):
else:
return str(self._data)
# convert to object array to make filled work
#!!!: the two lines below seem more robust than the self._data.astype
# res = numeric.empty(self._data.shape, object_)
# numeric.putmask(res,~m,self._data)
res = self._data.astype("|O8")
res[m] = f
names = self.dtype.names
if names is None:
res = self._data.astype("|O8")
res[m] = f
else:
res = self._data.astype([(n,'|O8') for n in names])
for field in names:
np.putmask(res[field], m[field], f)
else:
res = self.filled(self.fill_value)
return str(res)
Expand Down Expand Up @@ -3399,7 +3495,7 @@ def putmask(a, mask, values): #, mode='raise'):
if getmask(a) is nomask:
if valmask is not nomask:
a._sharedmask = True
a.mask = np.zeros(a.shape, dtype=bool_)
a._mask = make_mask_none(a.shape, a.dtype.names)
np.putmask(a._mask, mask, valmask)
elif a._hardmask:
if valmask is not nomask:
Expand Down
Loading

0 comments on commit c0bdc3a

Please sign in to comment.