BUG: None in read_fwf colspec should work like empty slice

DOC: update read_fwf docs
yoni · May 8, 2014 · b7b6b1f · b7b6b1f
1 parent fa1f585
commit b7b6b1f
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 2 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -809,6 +809,8 @@ two extra parameters:
     String value 'infer' can be used to instruct the parser to try detecting
     the column specifications from the first 100 rows of the data. Default
     behaviour, if not specified, is to infer.
+    As with regular python slices, you can slice to the end of the line
+    with ``None``, e.g. ``colspecs = [(0, 1), (1, None)]``.
   - ``widths``: A list of field widths which can be used instead of 'colspecs'
     if the intervals are contiguous.
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -479,6 +479,9 @@ Bug Fixes
   claim that they contained all the things (:issue:`7066`).
 - Bug in ``DataFrame.boxplot`` where it failed to use the axis passed as the ``ax`` argument (:issue:`3578`)
 - Bug in the ``XlsxWriter`` and ``XlwtWriter`` implementations that resulted in datetime columns being formatted without the time (:issue:`7075`)
+  were being passed to plotting method
+- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning
+  or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``)
 
 pandas 0.13.1
 -------------

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2232,10 +2232,11 @@ def __init__(self, f, colspecs, delimiter, comment):
                             "input was a %r" % type(colspecs).__name__)
 
         for colspec in self.colspecs:
+
             if not (isinstance(colspec, (tuple, list)) and
                     len(colspec) == 2 and
-                    isinstance(colspec[0], (int, np.integer)) and
-                    isinstance(colspec[1], (int, np.integer))):
+                    isinstance(colspec[0], (int, np.integer, type(None))) and
+                    isinstance(colspec[1], (int, np.integer, type(None)))):
                 raise TypeError('Each column specification must be '
                                 '2 element tuple or list of integers')
 

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -2326,6 +2326,33 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self):
                                    'Each column specification must be.+'):
             read_fwf(StringIO(self.data1), [('a', 1)])
 
+    def test_fwf_colspecs_None(self):
+        # GH 7079
+        data = """\
+123456
+456789
+"""
+        colspecs = [(0, 3), (3, None)]
+        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+        expected = DataFrame([[123, 456], [456, 789]])
+        tm.assert_frame_equal(result, expected)
+
+        colspecs = [(None, 3), (3, 6)]
+        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+        expected = DataFrame([[123, 456], [456, 789]])
+        tm.assert_frame_equal(result, expected)
+
+        colspecs = [(0, None), (3, None)]
+        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+        expected = DataFrame([[123456, 456], [456789, 789]])
+        tm.assert_frame_equal(result, expected)
+
+        colspecs = [(None, None), (3, 6)]
+        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+        expected = DataFrame([[123456, 456], [456789, 789]])
+        tm.assert_frame_equal(result, expected)
+
+
     def test_fwf_regression(self):
         # GH 3594
         #### turns out 'T060' is parsable as a datetime slice!