diff --git a/doc/source/io.rst b/doc/source/io.rst index 1aa6dde2c08b4..a0807088b2cf5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -809,6 +809,8 @@ two extra parameters: String value 'infer' can be used to instruct the parser to try detecting the column specifications from the first 100 rows of the data. Default behaviour, if not specified, is to infer. + As with regular python slices, you can slice to the end of the line + with ``None``, e.g. ``colspecs = [(0, 1), (1, None)]``. - ``widths``: A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. diff --git a/doc/source/release.rst b/doc/source/release.rst index 1d48674727d51..8422efd4247d1 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -479,6 +479,9 @@ Bug Fixes claim that they contained all the things (:issue:`7066`). - Bug in ``DataFrame.boxplot`` where it failed to use the axis passed as the ``ax`` argument (:issue:`3578`) - Bug in the ``XlsxWriter`` and ``XlwtWriter`` implementations that resulted in datetime columns being formatted without the time (:issue:`7075`) + were being passed to plotting method +- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning + or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``) pandas 0.13.1 ------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4898fabfcd2b4..bd53caf98f6b2 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2232,10 +2232,11 @@ def __init__(self, f, colspecs, delimiter, comment): "input was a %r" % type(colspecs).__name__) for colspec in self.colspecs: + if not (isinstance(colspec, (tuple, list)) and len(colspec) == 2 and - isinstance(colspec[0], (int, np.integer)) and - isinstance(colspec[1], (int, np.integer))): + isinstance(colspec[0], (int, np.integer, type(None))) and + isinstance(colspec[1], (int, np.integer, type(None)))): raise TypeError('Each column specification must be ' '2 element tuple or list of integers') diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 2a31eb9608001..ab9912d9b20bb 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2326,6 +2326,33 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): 'Each column specification must be.+'): read_fwf(StringIO(self.data1), [('a', 1)]) + def test_fwf_colspecs_None(self): + # GH 7079 + data = """\ +123456 +456789 +""" + colspecs = [(0, 3), (3, None)] + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + expected = DataFrame([[123, 456], [456, 789]]) + tm.assert_frame_equal(result, expected) + + colspecs = [(None, 3), (3, 6)] + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + expected = DataFrame([[123, 456], [456, 789]]) + tm.assert_frame_equal(result, expected) + + colspecs = [(0, None), (3, None)] + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + expected = DataFrame([[123456, 456], [456789, 789]]) + tm.assert_frame_equal(result, expected) + + colspecs = [(None, None), (3, 6)] + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + expected = DataFrame([[123456, 456], [456789, 789]]) + tm.assert_frame_equal(result, expected) + + def test_fwf_regression(self): # GH 3594 #### turns out 'T060' is parsable as a datetime slice!