Skip to content

Commit

Permalink
BUG: core/umath: fix cornercase for overlapping operands with interna…
Browse files Browse the repository at this point in the history
…l overlap

Ensure that a copy is made when ufunc output operand overlaps with ufunc
input, and both have internal overlap. The heuristic applied when
iteration order is elementwise is OK only in the absence of internal
overlap.
  • Loading branch information
pv committed Jan 21, 2017
1 parent 734d5f6 commit f7381b0
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 21 deletions.
7 changes: 6 additions & 1 deletion numpy/core/src/multiarray/nditer_constr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2749,6 +2749,9 @@ npyiter_allocate_arrays(NpyIter *iter,
* If the arrays are views to exactly the same data, no need
* to make copies, if the caller (eg ufunc) says it accesses
* data only in the iterator order.
*
* However, if there is internal overlap (e.g. a zero stride on
* a non-unit dimension), a copy cannot be avoided.
*/
if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
(op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
Expand All @@ -2760,7 +2763,9 @@ npyiter_allocate_arrays(NpyIter *iter,
PyArray_CompareLists(PyArray_STRIDES(op[iop]),
PyArray_STRIDES(op[iother]),
PyArray_NDIM(op[iop])) &&
PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother])) {
PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) &&
solve_may_have_internal_overlap(op[iop], 1) == 0) {

continue;
}

Expand Down
58 changes: 40 additions & 18 deletions numpy/core/tests/test_mem_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,21 @@ def random_slice_fixed_size(n, step, size):
yield x[j:], x[:-j]
yield x[...,j:], x[...,:-j]

# An array with zero stride internal overlap
strides = list(x.strides)
strides[0] = 0
xp = as_strided(x, shape=x.shape, strides=strides)
yield x, xp
yield xp, xp

# An array with non-zero stride internal overlap
strides = list(x.strides)
if strides[0] > 1:
strides[0] = 1
xp = as_strided(x, shape=x.shape, strides=strides)
yield x, xp
yield xp, xp

# Then discontiguous views
while True:
steps = tuple(rng.randint(1, 11, dtype=np.intp)
Expand Down Expand Up @@ -579,6 +594,27 @@ def view_element_first_byte(x):
return np.asarray(DummyArray(interface, x))


def assert_copy_equivalent(operation, args, out, **kwargs):
"""
Check that operation(*args, out=out) produces results
equivalent to out[...] = operation(*args, out=out.copy())
"""

kwargs['out'] = out
kwargs2 = dict(kwargs)
kwargs2['out'] = out.copy()

out_orig = out.copy()
out[...] = operation(*args, **kwargs2)
expected = out.copy()
out[...] = out_orig

got = operation(*args, **kwargs).copy()

if (got != expected).any():
assert_equal(got, expected)


class TestUFunc(object):
"""
Test ufunc call memory overlap handling
Expand All @@ -605,12 +641,7 @@ def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16,
b_orig = b.copy()

if get_out_axis_size is None:
bx = b.copy()
cx = operation(a, out=bx)
c = operation(a, out=b)

if (c != cx).any():
assert_equal(c, cx)
assert_copy_equivalent(operation, [a], out=b)

if np.shares_memory(a, b):
overlapping += 1
Expand Down Expand Up @@ -650,12 +681,7 @@ def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16,
overlapping += 1

# Check result
bx = b_out.copy()
cx = operation(a, out=bx, axis=axis)
c = operation(a, out=b_out, axis=axis)

if (c != cx).any():
assert_equal(c, cx)
assert_copy_equivalent(operation, [a], out=b_out, axis=axis)

def test_unary_ufunc_call_fuzz(self):
self.check_unary_fuzz(np.invert, None, np.int16)
Expand Down Expand Up @@ -757,12 +783,8 @@ def test_unary_gufunc_fuzz(self):
if np.shares_memory(a, b):
overlapping += 1

bx = b.copy()
cx = gufunc(a, out=bx)
c = gufunc(a, out=b)

if (c != cx).any():
assert_equal(c, cx)
with np.errstate(over='ignore', invalid='ignore'):
assert_copy_equivalent(gufunc, [a], out=b)

def test_ufunc_at_manual(self):
def check(ufunc, a, ind, b=None):
Expand Down
12 changes: 10 additions & 2 deletions numpy/core/tests/test_nditer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2408,7 +2408,11 @@ def test_iter_reduction():
assert_equal(i[1].strides, (0,))
# Do the reduction
for x, y in i:
y[...] += x
# Use a for loop instead of ``y[...] += x``
# (equivalent to ``y[...] = y[...].copy() + x``),
# because y has zero strides we use for the reduction
for j in range(len(y)):
y[j] += x[j]
# Since no axes were specified, should have allocated a scalar
assert_equal(i.operands[1].ndim, 0)
assert_equal(i.operands[1], np.sum(a))
Expand Down Expand Up @@ -2459,7 +2463,11 @@ def test_iter_buffering_reduction():
assert_equal(i[1].strides, (0,))
# Do the reduction
for x, y in i:
y[...] += x
# Use a for loop instead of ``y[...] += x``
# (equivalent to ``y[...] = y[...].copy() + x``),
# because y has zero strides we use for the reduction
for j in range(len(y)):
y[j] += x[j]
assert_equal(b, np.sum(a, axis=1))

# Iterator inner double loop was wrong on this one
Expand Down

0 comments on commit f7381b0

Please sign in to comment.